From b2eea6b799d31aae8240e41cf3f0e0d29e542255 Mon Sep 17 00:00:00 2001 From: xxchan Date: Mon, 2 Sep 2024 16:50:03 +0800 Subject: [PATCH 01/26] refactor(prost): optimize some Debug representation (#18211) Signed-off-by: xxchan --- src/prost/build.rs | 9 +++- src/prost/src/lib.rs | 98 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/src/prost/build.rs b/src/prost/build.rs index 6758c0ef437b4..18bc2d4ae9494 100644 --- a/src/prost/build.rs +++ b/src/prost/build.rs @@ -195,8 +195,13 @@ fn main() -> Result<(), Box> { // If any configuration for `prost_build` is not exposed by `tonic_build`, specify it here. let mut prost_config = prost_build::Config::new(); - prost_config.skip_debug(["meta.SystemParams"]); - + prost_config.skip_debug([ + "meta.SystemParams", + "plan_common.ColumnDesc", + "data.DataType", + // TODO: + //"stream_plan.StreamNode" + ]); // Compile the proto files. tonic_config .out_dir(out_dir.as_path()) diff --git a/src/prost/src/lib.rs b/src/prost/src/lib.rs index 4c4327d049446..c8ad9de582edc 100644 --- a/src/prost/src/lib.rs +++ b/src/prost/src/lib.rs @@ -19,6 +19,7 @@ use std::str::FromStr; +use plan_common::AdditionalColumn; pub use prost::Message; use risingwave_error::tonic::ToTonicStatus; use thiserror::Error; @@ -329,6 +330,103 @@ impl std::fmt::Debug for meta::SystemParams { } } +// More compact formats for debugging + +impl std::fmt::Debug for data::DataType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let data::DataType { + precision, + scale, + interval_type, + field_type, + field_names, + type_name, + // currently all data types are nullable + is_nullable: _, + } = self; + + let type_name = data::data_type::TypeName::try_from(*type_name) + .map(|t| t.as_str_name()) + .unwrap_or("Unknown"); + + let mut s = f.debug_struct(type_name); + if self.precision != 0 { + s.field("precision", precision); + } + if self.scale != 0 { + s.field("scale", scale); + } + if self.interval_type != 0 { + s.field("interval_type", interval_type); + } + if !self.field_type.is_empty() { + s.field("field_type", field_type); + } + if !self.field_names.is_empty() { + s.field("field_names", field_names); + } + s.finish() + } +} + +impl std::fmt::Debug for plan_common::column_desc::GeneratedOrDefaultColumn { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::GeneratedColumn(arg0) => f.debug_tuple("GeneratedColumn").field(arg0).finish(), + Self::DefaultColumn(arg0) => f.debug_tuple("DefaultColumn").field(arg0).finish(), + } + } +} + +impl std::fmt::Debug for plan_common::ColumnDesc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // destruct here to avoid missing new fields in the future. + let plan_common::ColumnDesc { + column_type, + column_id, + name, + field_descs, + type_name, + description, + additional_column_type, + additional_column, + generated_or_default_column, + version, + } = self; + + let mut s = f.debug_struct("ColumnDesc"); + if let Some(column_type) = column_type { + s.field("column_type", column_type); + } else { + s.field("column_type", &"Unknown"); + } + s.field("column_id", column_id).field("name", name); + if !self.field_descs.is_empty() { + s.field("field_descs", field_descs); + } + if !self.type_name.is_empty() { + s.field("type_name", type_name); + } + if let Some(description) = description { + s.field("description", description); + } + if self.additional_column_type != 0 { + s.field("additional_column_type", additional_column_type); + } + s.field("version", version); + if let Some(AdditionalColumn { column_type }) = additional_column { + // AdditionalColumn { None } means a normal column + if let Some(column_type) = column_type { + s.field("additional_column", &column_type); + } + } + if let Some(generated_or_default_column) = generated_or_default_column { + s.field("generated_or_default_column", &generated_or_default_column); + } + s.finish() + } +} + #[cfg(test)] mod tests { use crate::data::{data_type, DataType}; From ea6674ea31f2f6fd1b526c11405168f39a5131e0 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Tue, 3 Sep 2024 11:32:21 +0800 Subject: [PATCH 02/26] feat(risedev): prompt user with `argv[0]` instead of hard-coded `./risedev` (#18358) Signed-off-by: Bugen Zhao --- Makefile.toml | 2 +- risedev | 6 ++++ src/risedevtool/common.toml | 2 +- src/risedevtool/src/bin/risedev-dev.rs | 28 ++++++------------- .../src/task/configure_tmux_service.rs | 3 +- src/risedevtool/src/task/etcd_service.rs | 7 ++++- src/risedevtool/src/task/grafana_service.rs | 3 +- src/risedevtool/src/task/minio_service.rs | 7 ++++- .../src/task/prometheus_service.rs | 7 ++++- src/risedevtool/src/task/pubsub_service.rs | 7 ++++- src/risedevtool/src/task/redis_service.rs | 7 ++++- src/risedevtool/src/task/tempo_service.rs | 7 ++++- src/risedevtool/src/util.rs | 20 +++++++++++++ 13 files changed, 77 insertions(+), 29 deletions(-) diff --git a/Makefile.toml b/Makefile.toml index 554d2df8d3f3c..eec2e6252bcde 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -1244,7 +1244,7 @@ cat < "${INSTALL_PATH}" #!/usr/bin/env bash set -e cd "$DIR" -./risedev "\\$@" +RISEDEV_CMD="\\$(basename \\"$0")" ./risedev "\\$@" EOF chmod +x "${INSTALL_PATH}" diff --git a/risedev b/risedev index eb379c60cd668..2bb8e190bb476 100755 --- a/risedev +++ b/risedev @@ -12,6 +12,12 @@ fi touch risedev-components.user.env +# RISEDEV_CMD might be set if it is installed to the PATH. +# Otherwise, we set it to the current script name. +if [ -z "$RISEDEV_CMD" ]; then + export RISEDEV_CMD="$0" +fi + if [ $# -eq 0 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then cargo make --list-all-steps --hide-uninteresting exit 0 diff --git a/src/risedevtool/common.toml b/src/risedevtool/common.toml index 960ede84de140..165fc3ca88228 100644 --- a/src/risedevtool/common.toml +++ b/src/risedevtool/common.toml @@ -35,7 +35,7 @@ condition = { env_not_set = [ "RISEDEV_CONFIGURED" ] } script = ''' #!/usr/bin/env bash set -e -echo "RiseDev is not configured, please run ./risedev configure" +echo "RiseDev is not configured, please run ${RISEDEV_CMD} configure" exit 1 ''' diff --git a/src/risedevtool/src/bin/risedev-dev.rs b/src/risedevtool/src/bin/risedev-dev.rs index c367473eb8e77..c53453b3f903d 100644 --- a/src/risedevtool/src/bin/risedev-dev.rs +++ b/src/risedevtool/src/bin/risedev-dev.rs @@ -440,6 +440,8 @@ fn main() -> Result<()> { } manager.finish_all(); + use risedev::util::stylized_risedev_subcmd as r; + match task_result { Ok((stat, log_buffer)) => { println!("---- summary of startup time ----"); @@ -458,20 +460,11 @@ fn main() -> Result<()> { print!("{}", log_buffer); - println!( - "* You may find logs using {} command", - style("./risedev l").blue().bold() - ); + println!("* You may find logs using {} command", r("l")); - println!( - "* Run {} to kill cluster.", - style("./risedev k").blue().bold() - ); + println!("* Run {} to kill cluster.", r("k")); - println!( - "* Run {} to run `risedev` anywhere!", - style("./risedev install").blue().bold() - ); + println!("* Run {} to run `risedev` anywhere!", r("install")); Ok(()) } @@ -484,20 +477,17 @@ fn main() -> Result<()> { println!(); println!( "* Use `{}` to enable new components, if they are missing.", - style("./risedev configure").blue().bold(), + r("configure") ); println!( "* Use `{}` to view logs, or visit `{}`", - style("./risedev l").blue().bold(), + r("l"), env::var("PREFIX_LOG")? ); - println!( - "* Run `{}` to clean up cluster.", - style("./risedev k").blue().bold() - ); + println!("* Run `{}` to clean up cluster.", r("k")); println!( "* Run `{}` to clean data, which might potentially fix the issue.", - style("./risedev clean-data").blue().bold() + r("clean-data") ); println!("---"); println!(); diff --git a/src/risedevtool/src/task/configure_tmux_service.rs b/src/risedevtool/src/task/configure_tmux_service.rs index 367ea13c759ba..a20274edfc3c1 100644 --- a/src/risedevtool/src/task/configure_tmux_service.rs +++ b/src/risedevtool/src/task/configure_tmux_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{bail, Context, Result}; use console::style; +use crate::util::stylized_risedev_subcmd; use crate::{ExecuteContext, Task}; pub struct ConfigureTmuxTask; @@ -60,7 +61,7 @@ impl Task for ConfigureTmuxTask { if ctx.run_command(cmd).is_ok() { bail!( "A previous cluster is already running. Please kill it first with {}.", - style("./risedev k").blue().bold() + stylized_risedev_subcmd("k"), ); } diff --git a/src/risedevtool/src/task/etcd_service.rs b/src/risedevtool/src/task/etcd_service.rs index b702e2b15435b..fef92568dd6a2 100644 --- a/src/risedevtool/src/task/etcd_service.rs +++ b/src/risedevtool/src/task/etcd_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use itertools::Itertools; +use crate::util::stylized_risedev_subcmd; use crate::{EtcdConfig, Task}; pub struct EtcdService { @@ -102,7 +103,11 @@ impl Task for EtcdService { let path = Self::path()?; if !path.exists() { - return Err(anyhow!("etcd binary not found in {:?}\nDid you enable etcd feature in `./risedev configure`?", path)); + return Err(anyhow!( + "etcd binary not found in {:?}\nDid you enable etcd feature in `{}`?", + path, + stylized_risedev_subcmd("configure") + )); } let mut cmd = Self::etcd()?; diff --git a/src/risedevtool/src/task/grafana_service.rs b/src/risedevtool/src/task/grafana_service.rs index ab0ef2e71551b..a4b2b880a5d7b 100644 --- a/src/risedevtool/src/task/grafana_service.rs +++ b/src/risedevtool/src/task/grafana_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use super::{ExecuteContext, Task}; +use crate::util::stylized_risedev_subcmd; use crate::{GrafanaConfig, GrafanaGen}; pub struct GrafanaService { @@ -102,7 +103,7 @@ impl Task for GrafanaService { let path = self.grafana_server_path()?; if !path.exists() { - return Err(anyhow!("grafana-server binary not found in {:?}\nDid you enable monitoring feature in `./risedev configure`?", path)); + return Err(anyhow!("grafana-server binary not found in {:?}\nDid you enable monitoring feature in `{}`?", path, stylized_risedev_subcmd("configure"))); } Self::write_config_files( diff --git a/src/risedevtool/src/task/minio_service.rs b/src/risedevtool/src/task/minio_service.rs index 4c595a96f198e..d73fb93444ad5 100644 --- a/src/risedevtool/src/task/minio_service.rs +++ b/src/risedevtool/src/task/minio_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use super::{ExecuteContext, Task}; +use crate::util::stylized_risedev_subcmd; use crate::MinioConfig; pub struct MinioService { @@ -91,7 +92,11 @@ impl Task for MinioService { let path = self.minio_path()?; if !path.exists() { - return Err(anyhow!("minio binary not found in {:?}\nDid you enable minio feature in `./risedev configure`?", path)); + return Err(anyhow!( + "minio binary not found in {:?}\nDid you enable minio feature in `{}`?", + path, + stylized_risedev_subcmd("configure") + )); } let mut cmd = self.minio()?; diff --git a/src/risedevtool/src/task/prometheus_service.rs b/src/risedevtool/src/task/prometheus_service.rs index 0606c3edaef0e..878ba40f53347 100644 --- a/src/risedevtool/src/task/prometheus_service.rs +++ b/src/risedevtool/src/task/prometheus_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use super::{ExecuteContext, Task}; +use crate::util::stylized_risedev_subcmd; use crate::{PrometheusConfig, PrometheusGen}; pub struct PrometheusService { @@ -57,7 +58,11 @@ impl Task for PrometheusService { let path = self.prometheus_path()?; if !path.exists() { - return Err(anyhow!("prometheus binary not found in {:?}\nDid you enable monitoring feature in `./risedev configure`?", path)); + return Err(anyhow!( + "prometheus binary not found in {:?}\nDid you enable monitoring feature in `{}`?", + path, + stylized_risedev_subcmd("configure") + )); } let prefix_config = env::var("PREFIX_CONFIG")?; diff --git a/src/risedevtool/src/task/pubsub_service.rs b/src/risedevtool/src/task/pubsub_service.rs index e0ebd628778ff..6d09380f85680 100644 --- a/src/risedevtool/src/task/pubsub_service.rs +++ b/src/risedevtool/src/task/pubsub_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use super::{ExecuteContext, Task}; +use crate::util::stylized_risedev_subcmd; use crate::PubsubConfig; pub struct PubsubService { @@ -49,7 +50,11 @@ impl Task for PubsubService { let path = self.gcloud_path()?; if !path.exists() { - return Err(anyhow!("gcloud binary not found in {:?}\nDid you enable pubsub-emulator feature in `./risedev configure`?", path)); + return Err(anyhow!( + "gcloud binary not found in {:?}\nDid you enable pubsub-emulator feature in `{}`?", + path, + stylized_risedev_subcmd("configure") + )); } let mut cmd = self.gcloud()?; diff --git a/src/risedevtool/src/task/redis_service.rs b/src/risedevtool/src/task/redis_service.rs index 5415488aa5abc..63c25bef2a5ca 100644 --- a/src/risedevtool/src/task/redis_service.rs +++ b/src/risedevtool/src/task/redis_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; +use crate::util::stylized_risedev_subcmd; use crate::{ExecuteContext, RedisConfig, Task}; pub struct RedisService { @@ -49,7 +50,11 @@ impl Task for RedisService { ctx.pb.set_message("starting"); let path = self.redis_path()?; if !path.exists() { - return Err(anyhow!("Redis binary not found in {:?}\nDid you enable redis feature in `./risedev configure`?", path)); + return Err(anyhow!( + "Redis binary not found in {:?}\nDid you enable redis feature in `{}`?", + path, + stylized_risedev_subcmd("configure") + )); } let mut cmd = self.redis()?; diff --git a/src/risedevtool/src/task/tempo_service.rs b/src/risedevtool/src/task/tempo_service.rs index c60d06273566f..10ca962a99802 100644 --- a/src/risedevtool/src/task/tempo_service.rs +++ b/src/risedevtool/src/task/tempo_service.rs @@ -19,6 +19,7 @@ use std::process::Command; use anyhow::{anyhow, Result}; use super::{ExecuteContext, Task}; +use crate::util::stylized_risedev_subcmd; use crate::{TempoConfig, TempoGen}; pub struct TempoService { @@ -63,7 +64,11 @@ impl Task for TempoService { let path = self.tempo_path()?; if !path.exists() { - return Err(anyhow!("tempo binary not found in {:?}\nDid you enable tracing feature in `./risedev configure`?", path)); + return Err(anyhow!( + "tempo binary not found in {:?}\nDid you enable tracing feature in `{}`?", + path, + stylized_risedev_subcmd("configure") + )); } let prefix_config = env::var("PREFIX_CONFIG")?; diff --git a/src/risedevtool/src/util.rs b/src/risedevtool/src/util.rs index 42aa85730aa6c..5738d9a8041d5 100644 --- a/src/risedevtool/src/util.rs +++ b/src/risedevtool/src/util.rs @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Display; use std::process::Command; +use std::sync::LazyLock; use indicatif::{ProgressBar, ProgressStyle}; use itertools::Itertools; @@ -83,3 +85,21 @@ pub fn is_env_set(var: &str) -> bool { pub fn is_enable_backtrace() -> bool { !is_env_set("DISABLE_BACKTRACE") } + +pub fn risedev_cmd() -> &'static str { + static RISEDEV_CMD: LazyLock = LazyLock::new(|| { + if let Ok(val) = std::env::var("RISEDEV_CMD") { + val + } else { + "./risedev".to_owned() + } + }); + + RISEDEV_CMD.as_str() +} + +pub fn stylized_risedev_subcmd(subcmd: &str) -> impl Display { + console::style(format!("{} {}", risedev_cmd(), subcmd)) + .blue() + .bold() +} From 46bce7e4091c09db08714aad1bbee32b6c3f8da4 Mon Sep 17 00:00:00 2001 From: xxchan Date: Tue, 3 Sep 2024 11:36:51 +0800 Subject: [PATCH 03/26] refactor: add target_offsets to determinine if source backfill finished (#18297) --- .../source/source_backfill_executor.rs | 215 +++++++++++------- 1 file changed, 133 insertions(+), 82 deletions(-) diff --git a/src/stream/src/executor/source/source_backfill_executor.rs b/src/stream/src/executor/source/source_backfill_executor.rs index 39a458b28ff47..6b22293331306 100644 --- a/src/stream/src/executor/source/source_backfill_executor.rs +++ b/src/stream/src/executor/source/source_backfill_executor.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::cmp::Ordering; -use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::time::Instant; @@ -45,7 +44,7 @@ use crate::executor::{AddMutation, UpdateMutation}; pub enum BackfillState { /// `None` means not started yet. It's the initial state. Backfilling(Option), - /// Backfill is stopped at this offset. Source needs to filter out messages before this offset. + /// Backfill is stopped at this offset (inclusive). Source needs to filter out messages before this offset. SourceCachingUp(String), Finished, } @@ -59,54 +58,6 @@ impl BackfillState { pub fn restore_from_json(value: JsonbVal) -> anyhow::Result { serde_json::from_value(value.take()).map_err(|e| anyhow!(e)) } - - /// Returns whether the row from upstream `SourceExecutor` is visible. - fn handle_upstream_row(&mut self, offset: &str) -> bool { - let mut vis = false; - match self { - BackfillState::Backfilling(None) => { - // backfilling for this split is not started yet. Ignore this row - } - BackfillState::Backfilling(Some(backfill_offset)) => { - match compare_kafka_offset(backfill_offset, offset) { - Ordering::Less => { - // continue backfilling. Ignore this row - } - Ordering::Equal => { - // backfilling for this split is finished just right. - *self = BackfillState::Finished; - } - Ordering::Greater => { - // backfilling for this split produced more data than current source's progress. - // We should stop backfilling, and filter out rows from upstream with offset <= backfill_offset. - *self = BackfillState::SourceCachingUp(backfill_offset.clone()); - } - } - } - BackfillState::SourceCachingUp(backfill_offset) => { - match compare_kafka_offset(backfill_offset, offset) { - Ordering::Less => { - // Source caught up, but doesn't contain the last backfilled row. - // This may happen e.g., if Kafka performed compaction. - vis = true; - *self = BackfillState::Finished; - } - Ordering::Equal => { - // Source just caught up with backfilling. - *self = BackfillState::Finished; - } - Ordering::Greater => { - // Source is still behind backfilling. - } - } - } - BackfillState::Finished => { - vis = true; - // This split's backfilling is finisehd, we are waiting for other splits - } - } - vis - } } pub struct SourceBackfillExecutor { @@ -138,6 +89,8 @@ pub struct SourceBackfillExecutorInner { } /// Local variables used in the backfill stage. +/// +/// Note: all off the fields should contain all available splits, and we can `unwrap()` safely when `get()`. #[derive(Debug)] struct BackfillStage { states: BackfillStates, @@ -145,9 +98,28 @@ struct BackfillStage { /// /// Note: the offsets are not updated. Should use `state`'s offset to update before using it (`get_latest_unfinished_splits`). splits: Vec, + /// The latest offset from upstream (inclusive). After we reach this offset, we can stop backfilling. + /// TODO: initialize this with high watermark so that we can finish backfilling even when upstream + /// doesn't emit any data. + target_offsets: HashMap>, } impl BackfillStage { + fn debug_assert_consistent(&self) { + if cfg!(debug_assertions) { + let all_splits: HashSet<_> = + self.splits.iter().map(|split| split.id().clone()).collect(); + assert_eq!( + self.states.keys().cloned().collect::>(), + all_splits + ); + assert_eq!( + self.target_offsets.keys().cloned().collect::>(), + all_splits + ); + } + } + /// Get unfinished splits with latest offsets according to the backfill states. fn get_latest_unfinished_splits(&self) -> StreamExecutorResult> { let mut unfinished_splits = Vec::new(); @@ -165,6 +137,92 @@ impl BackfillStage { } Ok(unfinished_splits) } + + /// Updates backfill states and `target_offsets` and returns whether the row from upstream `SourceExecutor` is visible. + fn handle_upstream_row(&mut self, split_id: &str, offset: &str) -> bool { + let mut vis = false; + let state = self.states.get_mut(split_id).unwrap(); + match state { + BackfillState::Backfilling(None) => { + // backfilling for this split is not started yet. Ignore this row + } + BackfillState::Backfilling(Some(backfill_offset)) => { + match compare_kafka_offset(backfill_offset, offset) { + Ordering::Less => { + // continue backfilling. Ignore this row + } + Ordering::Equal => { + // backfilling for this split is finished just right. + *state = BackfillState::Finished; + } + Ordering::Greater => { + // backfilling for this split produced more data than current source's progress. + // We should stop backfilling, and filter out rows from upstream with offset <= backfill_offset. + *state = BackfillState::SourceCachingUp(backfill_offset.clone()); + } + } + } + BackfillState::SourceCachingUp(backfill_offset) => { + match compare_kafka_offset(backfill_offset, offset) { + Ordering::Less => { + // Source caught up, but doesn't contain the last backfilled row. + // This may happen e.g., if Kafka performed compaction. + vis = true; + *state = BackfillState::Finished; + } + Ordering::Equal => { + // Source just caught up with backfilling. + *state = BackfillState::Finished; + } + Ordering::Greater => { + // Source is still behind backfilling. + } + } + } + BackfillState::Finished => { + vis = true; + // This split's backfilling is finished, we are waiting for other splits + } + } + if matches!(state, BackfillState::Backfilling(_)) { + *self.target_offsets.get_mut(split_id).unwrap() = Some(offset.to_string()); + } + if vis { + debug_assert_eq!(*state, BackfillState::Finished); + } + vis + } + + /// Updates backfill states and returns whether the row from upstream `SourceExecutor` is visible. + fn handle_backfill_row(&mut self, split_id: &str, offset: &str) -> bool { + let state = self.states.get_mut(split_id).unwrap(); + match state { + BackfillState::Backfilling(_old_offset) => { + let target_offset = self.target_offsets.get(split_id).unwrap(); + if let Some(target_offset) = target_offset + && compare_kafka_offset(offset, target_offset).is_ge() + { + // Note1: If target_offset = offset, it seems we can mark the state as Finished without waiting for upstream to catch up + // and dropping duplicated messages. + // But it's not true if target_offset is fetched from other places, like Kafka high watermark. + // In this case, upstream hasn't reached the target_offset yet. + // + // Note2: after this, all following rows in the current chunk will be invisible. + // + // Note3: if target_offset is None (e.g., when upstream doesn't emit messages at all), we will + // keep backfilling. + *state = BackfillState::SourceCachingUp(offset.to_string()); + } else { + *state = BackfillState::Backfilling(Some(offset.to_string())); + } + true + } + BackfillState::SourceCachingUp(_) | BackfillState::Finished => { + // backfilling stopped. ignore + false + } + } + } } impl SourceBackfillExecutorInner { @@ -275,9 +333,15 @@ impl SourceBackfillExecutorInner { backfill_states.insert(split_id, backfill_state); } let mut backfill_stage = BackfillStage { + // init with None + target_offsets: backfill_states + .keys() + .map(|split_id| (split_id.clone(), None)) + .collect(), states: backfill_states, splits: owned_splits, }; + backfill_stage.debug_assert_consistent(); tracing::debug!(?backfill_stage, "source backfill started"); // Return the ownership of `stream_source_core` to the source executor. @@ -348,6 +412,7 @@ impl SourceBackfillExecutorInner { let mut last_barrier_time = Instant::now(); let mut self_paused = false; + // The main logic of the loop is in handle_upstream_row and handle_backfill_row. 'backfill_loop: while let Some(either) = backfill_stream.next().await { match either { // Upstream @@ -485,9 +550,7 @@ impl SourceBackfillExecutorInner { for (i, (_, row)) in chunk.rows().enumerate() { let split = row.datum_at(split_idx).unwrap().into_utf8(); let offset = row.datum_at(offset_idx).unwrap().into_utf8(); - let backfill_state = - backfill_stage.states.get_mut(split).unwrap(); - let vis = backfill_state.handle_upstream_row(offset); + let vis = backfill_stage.handle_upstream_row(split, offset); new_vis.set(i, vis); } // emit chunk if vis is not empty. i.e., some splits finished backfilling. @@ -527,36 +590,12 @@ impl SourceBackfillExecutorInner { self.system_params.load().barrier_interval_ms() as u128 * WAIT_BARRIER_MULTIPLE_TIMES; } - // TODO(optimize): actually each msg is from one split. We can - // include split from the message and avoid iterating over all rows. let mut new_vis = BitmapBuilder::zeroed(chunk.visibility().len()); for (i, (_, row)) in chunk.rows().enumerate() { - let split_id: Arc = - row.datum_at(split_idx).unwrap().into_utf8().into(); - let offset: String = - row.datum_at(offset_idx).unwrap().into_utf8().into(); - // update backfill progress - let mut vis = true; - match backfill_stage.states.entry(split_id.clone()) { - Entry::Occupied(mut entry) => { - let state = entry.get_mut(); - match state { - BackfillState::Backfilling(_) => { - *state = - BackfillState::Backfilling(Some(offset.clone())); - } - BackfillState::SourceCachingUp(_) - | BackfillState::Finished => { - // backfilling stopped. ignore - vis = false - } - } - } - Entry::Vacant(entry) => { - entry.insert(BackfillState::Backfilling(Some(offset.clone()))); - } - } + let split_id = row.datum_at(split_idx).unwrap().into_utf8(); + let offset = row.datum_at(offset_idx).unwrap().into_utf8(); + let vis = backfill_stage.handle_backfill_row(split_id, offset); new_vis.set(i, vis); } @@ -678,7 +717,7 @@ impl SourceBackfillExecutorInner { // Iterate over the target (assigned) splits // - check if any new splits are added // - build target_state - for split in target_splits { + for split in &target_splits { let split_id = split.id(); if let Some(s) = old_states.get(&split_id) { target_state.insert(split_id, s.clone()); @@ -727,7 +766,19 @@ impl SourceBackfillExecutorInner { debug_assert_eq!(old_states, target_state); } stage.states = target_state; - + stage.splits = target_splits; + let old_target_offsets = std::mem::take(&mut stage.target_offsets); + stage.target_offsets = stage + .states + .keys() + .map(|split_id| { + ( + split_id.clone(), + old_target_offsets.get(split_id).cloned().flatten(), + ) + }) + .collect(); + stage.debug_assert_consistent(); Ok(split_changed) } From 5a314486da5c3f70e71fb13e2398df96ca00ee87 Mon Sep 17 00:00:00 2001 From: Dylan Date: Tue, 3 Sep 2024 11:41:20 +0800 Subject: [PATCH 04/26] feat(compute): give more batch memory for serving node (#18365) --- src/compute/src/memory/config.rs | 11 ++++++++--- src/compute/src/server.rs | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/compute/src/memory/config.rs b/src/compute/src/memory/config.rs index 234490773f40f..fdba52884f47e 100644 --- a/src/compute/src/memory/config.rs +++ b/src/compute/src/memory/config.rs @@ -43,7 +43,8 @@ const STORAGE_META_CACHE_MEMORY_PROPORTION: f64 = 0.35; const STORAGE_SHARED_BUFFER_MEMORY_PROPORTION: f64 = 0.3; /// The proportion of compute memory used for batch processing. -const COMPUTE_BATCH_MEMORY_PROPORTION: f64 = 0.3; +const COMPUTE_BATCH_MEMORY_PROPORTION_FOR_STREAMING: f64 = 0.3; +const COMPUTE_BATCH_MEMORY_PROPORTION_FOR_SERVING: f64 = 0.6; /// Each compute node reserves some memory for stack and code segment of processes, allocation /// overhead, network buffer, etc. based on gradient reserve memory proportion. The reserve memory @@ -299,8 +300,12 @@ pub fn storage_memory_config( } } -pub fn batch_mem_limit(compute_memory_bytes: usize) -> u64 { - (compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION) as u64 +pub fn batch_mem_limit(compute_memory_bytes: usize, is_serving_node: bool) -> u64 { + if is_serving_node { + (compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION_FOR_SERVING) as u64 + } else { + (compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION_FOR_STREAMING) as u64 + } } #[cfg(test)] diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs index f8a673b07c5da..ad9c61b3a428e 100644 --- a/src/compute/src/server.rs +++ b/src/compute/src/server.rs @@ -288,7 +288,7 @@ pub async fn compute_node_serve( let batch_mgr = Arc::new(BatchManager::new( config.batch.clone(), batch_manager_metrics, - batch_mem_limit(compute_memory_bytes), + batch_mem_limit(compute_memory_bytes, opts.role.for_serving()), )); // NOTE: Due to some limits, we use `compute_memory_bytes + storage_memory_bytes` as From 9d6515b6a8af4e8935bea76e46aa14aecc174cf7 Mon Sep 17 00:00:00 2001 From: Li0k Date: Tue, 3 Sep 2024 12:38:18 +0800 Subject: [PATCH 05/26] feat(compaction): support recreate block stream for fast compact iter (#18350) --- .../compactor/fast_compactor_runner.rs | 149 ++++++++++++++---- src/storage/src/hummock/compactor/iterator.rs | 15 +- src/storage/src/hummock/test_utils.rs | 12 ++ .../src/storage_failpoints/test_iterator.rs | 83 +++++++++- 4 files changed, 221 insertions(+), 38 deletions(-) diff --git a/src/storage/src/hummock/compactor/fast_compactor_runner.rs b/src/storage/src/hummock/compactor/fast_compactor_runner.rs index 2c2fa3b781528..6ec194203ff22 100644 --- a/src/storage/src/hummock/compactor/fast_compactor_runner.rs +++ b/src/storage/src/hummock/compactor/fast_compactor_runner.rs @@ -21,6 +21,7 @@ use std::time::Instant; use await_tree::InstrumentAwait; use bytes::Bytes; +use fail::fail_point; use itertools::Itertools; use risingwave_hummock_sdk::compact_task::CompactTask; use risingwave_hummock_sdk::key::FullKey; @@ -49,7 +50,7 @@ use crate::monitor::{CompactorMetrics, StoreLocalStatistic}; /// Iterates over the KV-pairs of an SST while downloading it. pub struct BlockStreamIterator { /// The downloading stream. - block_stream: BlockDataStream, + block_stream: Option, next_block_index: usize, @@ -57,6 +58,13 @@ pub struct BlockStreamIterator { sstable: TableHolder, iter: Option, task_progress: Arc, + + // For block stream recreate + sstable_store: SstableStoreRef, + sstable_info: SstableInfo, + io_retry_times: usize, + max_io_retry_times: usize, + stats_ptr: Arc, } impl BlockStreamIterator { @@ -76,34 +84,105 @@ impl BlockStreamIterator { /// The iterator reads at most `max_block_count` from the stream. pub fn new( sstable: TableHolder, - block_stream: BlockDataStream, task_progress: Arc, + sstable_store: SstableStoreRef, + sstable_info: SstableInfo, + max_io_retry_times: usize, + stats_ptr: Arc, ) -> Self { Self { - block_stream, + block_stream: None, next_block_index: 0, sstable, iter: None, task_progress, + sstable_store, + sstable_info, + io_retry_times: 0, + max_io_retry_times, + stats_ptr, } } + async fn create_stream(&mut self) -> HummockResult<()> { + // Fast compact only support the single table compaction.(not split sst) + // So we don't need to filter the block_metas with table_id and key_range + let block_stream = self + .sstable_store + .get_stream_for_blocks( + self.sstable_info.object_id, + &self.sstable.meta.block_metas[self.next_block_index..], + ) + .verbose_instrument_await("stream_iter_get_stream") + .await?; + self.block_stream = Some(block_stream); + Ok(()) + } + /// Wrapper function for `self.block_stream.next()` which allows us to measure the time needed. - async fn download_next_block(&mut self) -> HummockResult, BlockMeta)>> { - let (data, _) = match self.block_stream.next_block_impl().await? { - None => return Ok(None), - Some(ret) => ret, - }; - let meta = self.sstable.meta.block_metas[self.next_block_index].clone(); - let filter_block = self - .sstable - .filter_reader - .get_block_raw_filter(self.next_block_index); - self.next_block_index += 1; - Ok(Some((data, filter_block, meta))) + pub(crate) async fn download_next_block( + &mut self, + ) -> HummockResult, BlockMeta)>> { + let now = Instant::now(); + let _time_stat = scopeguard::guard(self.stats_ptr.clone(), |stats_ptr: Arc| { + let add = (now.elapsed().as_secs_f64() * 1000.0).ceil(); + stats_ptr.fetch_add(add as u64, atomic::Ordering::Relaxed); + }); + loop { + let ret = match &mut self.block_stream { + Some(block_stream) => block_stream.next_block_impl().await, + None => { + self.create_stream().await?; + continue; + } + }; + match ret { + Ok(Some((data, _))) => { + let meta = self.sstable.meta.block_metas[self.next_block_index].clone(); + let filter_block = self + .sstable + .filter_reader + .get_block_raw_filter(self.next_block_index); + self.next_block_index += 1; + return Ok(Some((data, filter_block, meta))); + } + + Ok(None) => break, + + Err(e) => { + if !e.is_object_error() || self.io_retry_times >= self.max_io_retry_times { + return Err(e); + } + + self.block_stream.take(); + self.io_retry_times += 1; + fail_point!("create_stream_err"); + + tracing::warn!( + "fast compact retry create stream for sstable {} times, sstinfo={}", + self.io_retry_times, + format!( + "object_id={}, sst_id={}, meta_offset={}, table_ids={:?}", + self.sstable_info.object_id, + self.sstable_info.sst_id, + self.sstable_info.meta_offset, + self.sstable_info.table_ids + ) + ); + } + } + } + + self.next_block_index = self.sstable.meta.block_metas.len(); + self.iter.take(); + Ok(None) } - fn init_block_iter(&mut self, buf: Bytes, uncompressed_capacity: usize) -> HummockResult<()> { + pub(crate) fn init_block_iter( + &mut self, + buf: Bytes, + uncompressed_capacity: usize, + ) -> HummockResult<()> { let block = Block::decode(buf, uncompressed_capacity)?; let mut iter = BlockIterator::new(BlockHolder::from_owned_block(Box::new(block))); iter.seek_to_first(); @@ -153,9 +232,15 @@ impl BlockStreamIterator { } } - fn is_valid(&self) -> bool { + pub(crate) fn is_valid(&self) -> bool { self.iter.is_some() || self.next_block_index < self.sstable.meta.block_metas.len() } + + #[cfg(test)] + #[cfg(feature = "failpoints")] + pub(crate) fn iter_mut(&mut self) -> &mut BlockIterator { + self.iter.as_mut().unwrap() + } } impl Drop for BlockStreamIterator { @@ -180,6 +265,8 @@ pub struct ConcatSstableIterator { stats: StoreLocalStatistic, task_progress: Arc, + + max_io_retry_times: usize, } impl ConcatSstableIterator { @@ -190,6 +277,7 @@ impl ConcatSstableIterator { sst_infos: Vec, sstable_store: SstableStoreRef, task_progress: Arc, + max_io_retry_times: usize, ) -> Self { Self { sstable_iter: None, @@ -198,6 +286,7 @@ impl ConcatSstableIterator { sstable_store, task_progress, stats: StoreLocalStatistic::default(), + max_io_retry_times, } } @@ -239,24 +328,16 @@ impl ConcatSstableIterator { .sstable(sstable_info, &mut self.stats) .verbose_instrument_await("stream_iter_sstable") .await?; - let stats_ptr = self.stats.remote_io_time.clone(); - let now = Instant::now(); self.task_progress.inc_num_pending_read_io(); - // Fast compact only support the single table compaction.(not split sst) - // So we don't need to filter the block_metas with table_id and key_range - let block_stream = self - .sstable_store - .get_stream_for_blocks(sstable.id, &sstable.meta.block_metas) - .verbose_instrument_await("stream_iter_get_stream") - .await?; - - // Determine time needed to open stream. - let add = (now.elapsed().as_secs_f64() * 1000.0).ceil(); - stats_ptr.fetch_add(add as u64, atomic::Ordering::Relaxed); - - let sstable_iter = - BlockStreamIterator::new(sstable, block_stream, self.task_progress.clone()); + let sstable_iter = BlockStreamIterator::new( + sstable, + self.task_progress.clone(), + self.sstable_store.clone(), + sstable_info.clone(), + self.max_io_retry_times, + self.stats.remote_io_time.clone(), + ); self.sstable_iter = Some(sstable_iter); } Ok(()) @@ -335,11 +416,13 @@ impl CompactorRunner { task.input_ssts[0].table_infos.clone(), context.sstable_store.clone(), task_progress.clone(), + context.storage_opts.compactor_iter_max_io_retry_times, )); let right = Box::new(ConcatSstableIterator::new( task.input_ssts[1].table_infos.clone(), context.sstable_store, task_progress.clone(), + context.storage_opts.compactor_iter_max_io_retry_times, )); let state = SkipWatermarkState::from_safe_epoch_watermarks(&task.table_watermarks); diff --git a/src/storage/src/hummock/compactor/iterator.rs b/src/storage/src/hummock/compactor/iterator.rs index d1bb5f9753f84..c53f945af2c50 100644 --- a/src/storage/src/hummock/compactor/iterator.rs +++ b/src/storage/src/hummock/compactor/iterator.rs @@ -192,9 +192,13 @@ impl SstableStreamIterator { /// `self.block_iter` to `None`. async fn next_block(&mut self) -> HummockResult<()> { // Check if we want and if we can load the next block. + let now = Instant::now(); + let _time_stat = scopeguard::guard(self.stats_ptr.clone(), |stats_ptr: Arc| { + let add = (now.elapsed().as_secs_f64() * 1000.0).ceil(); + stats_ptr.fetch_add(add as u64, atomic::Ordering::Relaxed); + }); if self.block_idx < self.block_metas.len() { loop { - let now = Instant::now(); let ret = match &mut self.block_stream { Some(block_stream) => block_stream.next_block().await, None => { @@ -202,7 +206,6 @@ impl SstableStreamIterator { continue; } }; - let add = (now.elapsed().as_secs_f64() * 1000.0).ceil(); match ret { Ok(Some(block)) => { let mut block_iter = @@ -220,10 +223,14 @@ impl SstableStreamIterator { self.block_stream.take(); self.io_retry_times += 1; fail_point!("create_stream_err"); + + tracing::warn!( + "retry create stream for sstable {} times, sstinfo={}", + self.io_retry_times, + self.sst_debug_info() + ); } } - self.stats_ptr - .fetch_add(add as u64, atomic::Ordering::Relaxed); } } self.block_idx = self.block_metas.len(); diff --git a/src/storage/src/hummock/test_utils.rs b/src/storage/src/hummock/test_utils.rs index a9b1e9dfb31b7..8d96e29f5426d 100644 --- a/src/storage/src/hummock/test_utils.rs +++ b/src/storage/src/hummock/test_utils.rs @@ -188,7 +188,19 @@ pub async fn put_sst( .write_block(&data[offset..end_offset], block_meta) .await?; } + + // dummy + let bloom_filter = { + let mut filter_builder = BlockedXor16FilterBuilder::new(100); + for _ in &meta.block_metas { + filter_builder.switch_block(None); + } + + filter_builder.finish(None) + }; + meta.meta_offset = writer.data_len() as u64; + meta.bloom_filter = bloom_filter; let sst = SstableInfo { object_id: sst_object_id, sst_id: sst_object_id, diff --git a/src/storage/src/storage_failpoints/test_iterator.rs b/src/storage/src/storage_failpoints/test_iterator.rs index cb05f0b788c29..ada1bef109b3b 100644 --- a/src/storage/src/storage_failpoints/test_iterator.rs +++ b/src/storage/src/storage_failpoints/test_iterator.rs @@ -13,9 +13,10 @@ // limitations under the License. use std::ops::Bound::Unbounded; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::Arc; +use crate::hummock::compactor::fast_compactor_runner::BlockStreamIterator; use crate::hummock::compactor::{SstableStreamIterator, TaskProgress}; use crate::hummock::iterator::test_utils::{ gen_iterator_test_sstable_base, gen_iterator_test_sstable_info, iterator_test_bytes_key_of, @@ -422,3 +423,83 @@ async fn test_failpoints_compactor_iterator_recreate() { assert_eq!(cnt, TEST_KEYS_COUNT); assert!(meet_err.load(Ordering::Acquire)); } + +#[tokio::test] +#[cfg(feature = "failpoints")] +async fn test_failpoints_fast_compactor_iterator_recreate() { + let get_stream_err = "get_stream_err"; + let stream_read_err = "stream_read_err"; + let create_stream_err = "create_stream_err"; + let sstable_store = mock_sstable_store().await; + // when upload data is successful, but upload meta is fail and delete is fail + let has_create = Arc::new(AtomicBool::new(false)); + fail::cfg_callback(get_stream_err, move || { + if has_create.load(Ordering::Acquire) { + fail::remove(stream_read_err); + fail::remove(get_stream_err); + } else { + has_create.store(true, Ordering::Release); + fail::cfg(stream_read_err, "return").unwrap(); + } + }) + .unwrap(); + let meet_err = Arc::new(AtomicBool::new(false)); + let other = meet_err.clone(); + fail::cfg_callback(create_stream_err, move || { + other.store(true, Ordering::Release); + }) + .unwrap(); + + let table_id = 0; + let kv_iter = + (0..TEST_KEYS_COUNT).map(|i| (test_key_of(i), HummockValue::put(test_value_of(i)))); + let (data, meta) = gen_test_sstable_data(default_builder_opt_for_test(), kv_iter).await; + let info = put_sst( + table_id, + data.clone(), + meta.clone(), + sstable_store.clone(), + default_writer_opt_for_test(), + vec![table_id as u32], + ) + .await + .unwrap(); + + let mut stats = StoreLocalStatistic::default(); + + let table = sstable_store.sstable(&info, &mut stats).await.unwrap(); + let mut sstable_iter = BlockStreamIterator::new( + table, + Arc::new(TaskProgress::default()), + sstable_store.clone(), + info.clone(), + 10, + Arc::new(AtomicU64::new(0)), + ); + + let mut cnt = 0; + while sstable_iter.is_valid() { + let (buf, _, meta) = match sstable_iter.download_next_block().await.unwrap() { + Some(x) => x, + None => break, + }; + sstable_iter + .init_block_iter(buf, meta.uncompressed_size as usize) + .unwrap(); + + let block_iter = sstable_iter.iter_mut(); + + while block_iter.is_valid() { + let key = block_iter.key(); + let value = HummockValue::from_slice(block_iter.value()).unwrap(); + assert_eq!(test_key_of(cnt).to_ref(), key); + let expected = test_value_of(cnt); + let expected_slice = expected.as_slice(); + assert_eq!(value.into_user_value().unwrap(), expected_slice); + cnt += 1; + block_iter.next(); + } + } + assert_eq!(cnt, TEST_KEYS_COUNT); + assert!(meet_err.load(Ordering::Acquire)); +} From 124011d94ab7ac44c8a7868688e4b326a10ac0b0 Mon Sep 17 00:00:00 2001 From: StrikeW Date: Tue, 3 Sep 2024 12:43:15 +0800 Subject: [PATCH 06/26] feat(cdc): support constant default value for alter table ADD COLUMN (#18322) --- .../cdc_inline/alter/cdc_table_alter.slt | 12 +-- .../cdc_inline/auto_schema_change_mysql.slt | 19 ++++- src/common/src/catalog/column.rs | 14 ++- src/connector/src/parser/unified/debezium.rs | 39 ++++++++- src/connector/src/source/manager.rs | 14 +-- .../src/handler/alter_table_column.rs | 25 +++--- src/frontend/src/handler/create_sink.rs | 1 + src/frontend/src/handler/create_table.rs | 85 ++++++++++++++----- src/frontend/src/rpc/mod.rs | 11 ++- src/meta/service/src/ddl_service.rs | 40 ++++----- 10 files changed, 193 insertions(+), 67 deletions(-) diff --git a/e2e_test/source/cdc_inline/alter/cdc_table_alter.slt b/e2e_test/source/cdc_inline/alter/cdc_table_alter.slt index 6bea5dce2fe45..baecff00c09a4 100644 --- a/e2e_test/source/cdc_inline/alter/cdc_table_alter.slt +++ b/e2e_test/source/cdc_inline/alter/cdc_table_alter.slt @@ -131,13 +131,13 @@ select order_id, product_id, shipment_id from enriched_orders order by order_id; system ok mysql -e " USE testdb1; - ALTER TABLE products ADD COLUMN weight DECIMAL(10, 2) NOT NULL DEFAULT 0.0; + ALTER TABLE products ADD COLUMN weight DECIMAL(10, 2) NOT NULL DEFAULT 1.1; ALTER TABLE orders ADD COLUMN order_comment VARCHAR(255); " # alter cdc tables statement ok -ALTER TABLE my_products ADD COLUMN weight DECIMAL; +ALTER TABLE my_products ADD COLUMN weight DECIMAL DEFAULT 1.1; statement ok ALTER TABLE my_orders ADD COLUMN order_comment VARCHAR; @@ -148,9 +148,9 @@ sleep 3s query ITTT SELECT id,name,description,weight FROM my_products order by id limit 3 ---- -101 scooter Small 2-wheel scooter NULL -102 car battery 12V car battery NULL -103 12-pack drill 12-pack of drill bits with sizes ranging from #40 to #3 NULL +101 scooter Small 2-wheel scooter 1.1 +102 car battery 12V car battery 1.1 +103 12-pack drill 12-pack of drill bits with sizes ranging from #40 to #3 1.1 # update mysql tables @@ -169,7 +169,7 @@ SELECT id,name,description,weight FROM my_products order by id limit 3 ---- 101 scooter Small 2-wheel scooter 10.50 102 car battery 12V car battery 12.50 -103 12-pack drill 12-pack of drill bits with sizes ranging from #40 to #3 NULL +103 12-pack drill 12-pack of drill bits with sizes ranging from #40 to #3 1.1 query ITTT SELECT order_id,order_date,customer_name,product_id,order_status,order_comment FROM my_orders order by order_id limit 2 diff --git a/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt b/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt index 3c386a2718479..f1c94be75ccf5 100644 --- a/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt +++ b/e2e_test/source/cdc_inline/auto_schema_change_mysql.slt @@ -10,8 +10,11 @@ mysql -e " CREATE TABLE customers( id BIGINT PRIMARY KEY, modified DATETIME, + name VARCHAR(32), custinfo JSON ); + INSERT INTO customers VALUES(1, NOW(), 'John', NULL); + INSERT INTO customers VALUES(2, NOW(), 'Doe', NULL); ALTER TABLE customers ADD INDEX zipsa( (CAST(custinfo->'zipcode' AS UNSIGNED ARRAY)) ); " @@ -28,7 +31,7 @@ create source mysql_source with ( ); statement ok -create table rw_customers (id bigint, modified timestamp, custinfo jsonb, primary key (id)) from mysql_source table 'mytest.customers'; +create table rw_customers (id bigint, modified timestamp, name varchar, custinfo jsonb, primary key (id)) from mysql_source table 'mytest.customers'; # Name, Type, Is Hidden, Description query TTTT @@ -36,6 +39,7 @@ describe rw_customers; ---- id bigint false NULL modified timestamp without time zone false NULL +name character varying false NULL custinfo jsonb false NULL primary key id NULL NULL distribution key id NULL NULL @@ -46,8 +50,8 @@ table description rw_customers NULL NULL system ok mysql -e " USE mytest; - ALTER TABLE customers ADD COLUMN v1 VARCHAR(255); - ALTER TABLE customers ADD COLUMN v2 double(5,2); + ALTER TABLE customers ADD COLUMN v1 VARCHAR(255) DEFAULT 'hello'; + ALTER TABLE customers ADD COLUMN v2 double(5,2) DEFAULT 88.9; " sleep 3s @@ -58,6 +62,7 @@ describe rw_customers; ---- id bigint false NULL modified timestamp without time zone false NULL +name character varying false NULL custinfo jsonb false NULL v1 character varying false NULL v2 double precision false NULL @@ -65,6 +70,12 @@ primary key id NULL NULL distribution key id NULL NULL table description rw_customers NULL NULL +query TTTT +select id,v1,v2,name from rw_customers order by id; +---- +1 hello 88.9 John +2 hello 88.9 Doe + # rename column on upstream will not be replicated, since we do not support rename column system ok mysql -e " @@ -81,6 +92,7 @@ describe rw_customers; ---- id bigint false NULL modified timestamp without time zone false NULL +name character varying false NULL custinfo jsonb false NULL v1 character varying false NULL v2 double precision false NULL @@ -112,6 +124,7 @@ query TTTT describe rw_customers; ---- id bigint false NULL +name character varying false NULL custinfo jsonb false NULL primary key id NULL NULL distribution key id NULL NULL diff --git a/src/common/src/catalog/column.rs b/src/common/src/catalog/column.rs index b3065defea2a2..f1d1123fbfd4e 100644 --- a/src/common/src/catalog/column.rs +++ b/src/common/src/catalog/column.rs @@ -18,7 +18,7 @@ use itertools::Itertools; use risingwave_pb::expr::ExprNode; use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; use risingwave_pb::plan_common::{ - AdditionalColumn, ColumnDescVersion, PbColumnCatalog, PbColumnDesc, + AdditionalColumn, ColumnDescVersion, DefaultColumnDesc, PbColumnCatalog, PbColumnDesc, }; use super::{row_id_column_desc, USER_COLUMN_ID_OFFSET}; @@ -140,6 +140,18 @@ impl ColumnDesc { } } + pub fn named_with_default_value( + name: impl Into, + column_id: ColumnId, + data_type: DataType, + default_val: DefaultColumnDesc, + ) -> ColumnDesc { + ColumnDesc { + generated_or_default_column: Some(GeneratedOrDefaultColumn::DefaultColumn(default_val)), + ..Self::named(name, column_id, data_type) + } + } + pub fn named_with_additional_column( name: impl Into, column_id: ColumnId, diff --git a/src/connector/src/parser/unified/debezium.rs b/src/connector/src/parser/unified/debezium.rs index 2dbe78cf32e25..a2c5742b87d44 100644 --- a/src/connector/src/parser/unified/debezium.rs +++ b/src/connector/src/parser/unified/debezium.rs @@ -18,8 +18,12 @@ use risingwave_common::types::{ DataType, Datum, DatumCow, Scalar, ScalarImpl, ScalarRefImpl, Timestamptz, ToDatumRef, ToOwnedDatum, }; +use risingwave_common::util::value_encoding::DatumToProtoExt; use risingwave_connector_codec::decoder::AccessExt; +use risingwave_pb::expr::expr_node::{RexNode, Type as ExprType}; +use risingwave_pb::expr::ExprNode; use risingwave_pb::plan_common::additional_column::ColumnType; +use risingwave_pb::plan_common::DefaultColumnDesc; use thiserror_ext::AsReport; use super::{Access, AccessError, AccessResult, ChangeEvent, ChangeEventOperation}; @@ -221,7 +225,40 @@ pub fn parse_schema_change( } }; - column_descs.push(ColumnDesc::named(name, ColumnId::placeholder(), data_type)); + // handle default value expression, currently we only support constant expression + let column_desc = match col.access_object_field("defaultValueExpression") { + Some(default_val_expr_str) if !default_val_expr_str.is_jsonb_null() => { + let value_text = default_val_expr_str.as_string().unwrap(); + let snapshot_value: Datum = Some( + ScalarImpl::from_text(value_text.as_str(), &data_type).map_err( + |err| { + tracing::error!(target: "auto_schema_change", error=%err.as_report(), "failed to parse default value expression"); + AccessError::TypeError { + expected: "constant expression".into(), + got: data_type.to_string(), + value: value_text, + }}, + )?, + ); + // equivalent to `Literal::to_expr_proto` + let default_val_expr_node = ExprNode { + function_type: ExprType::Unspecified as i32, + return_type: Some(data_type.to_protobuf()), + rex_node: Some(RexNode::Constant(snapshot_value.to_protobuf())), + }; + ColumnDesc::named_with_default_value( + name, + ColumnId::placeholder(), + data_type, + DefaultColumnDesc { + expr: Some(default_val_expr_node), + snapshot_value: Some(snapshot_value.to_protobuf()), + }, + ) + } + _ => ColumnDesc::named(name, ColumnId::placeholder(), data_type), + }; + column_descs.push(column_desc); } } diff --git a/src/connector/src/source/manager.rs b/src/connector/src/source/manager.rs index 731d0c4ff8ae8..67826129c8b82 100644 --- a/src/connector/src/source/manager.rs +++ b/src/connector/src/source/manager.rs @@ -19,6 +19,7 @@ use risingwave_common::catalog::{ TABLE_NAME_COLUMN_NAME, }; use risingwave_common::types::DataType; +use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; use risingwave_pb::plan_common::{AdditionalColumn, ColumnDescVersion}; /// `SourceColumnDesc` is used to describe a column in the Source. @@ -137,11 +138,14 @@ impl From<&ColumnDesc> for SourceColumnDesc { version: _, }: &ColumnDesc, ) -> Self { - debug_assert!( - generated_or_default_column.is_none(), - "source column should not be generated or default: {:?}", - generated_or_default_column.as_ref().unwrap() - ); + if let Some(option) = generated_or_default_column { + debug_assert!( + matches!(option, GeneratedOrDefaultColumn::DefaultColumn(_)), + "source column should not be generated: {:?}", + generated_or_default_column.as_ref().unwrap() + ) + } + Self { name: name.clone(), data_type: data_type.clone(), diff --git a/src/frontend/src/handler/alter_table_column.rs b/src/frontend/src/handler/alter_table_column.rs index f00ff35992b43..4fd624929a175 100644 --- a/src/frontend/src/handler/alter_table_column.rs +++ b/src/frontend/src/handler/alter_table_column.rs @@ -58,6 +58,7 @@ pub async fn replace_table_with_definition( definition, original_catalog, source_schema, + None, ) .await?; @@ -73,7 +74,7 @@ pub async fn replace_table_with_definition( pub async fn get_new_table_definition_for_cdc_table( session: &Arc, table_name: ObjectName, - new_columns: Vec, + new_columns: &[ColumnCatalog], ) -> Result<(Statement, Arc)> { let original_catalog = fetch_table_catalog_for_alter(session.as_ref(), &table_name)?; @@ -96,22 +97,24 @@ pub async fn get_new_table_definition_for_cdc_table( "source schema should be None for CDC table" ); - let orig_column_map: HashMap = HashMap::from_iter( - original_columns + let orig_column_catalog: HashMap = HashMap::from_iter( + original_catalog + .columns() .iter() - .map(|col| (col.name.real_value(), col.clone())), + .map(|col| (col.name().to_string(), col.clone())), ); // update the original columns with new version columns let mut new_column_defs = vec![]; - for col in new_columns { - // if the column exists in the original definitoins, use the original column definition. + for new_col in new_columns { + // if the column exists in the original catalog, use it to construct the column definition. // since we don't support altering the column type right now - if let Some(original_col) = orig_column_map.get(col.name()) { - new_column_defs.push(original_col.clone()); + if let Some(original_col) = orig_column_catalog.get(new_col.name()) { + let ty = to_ast_data_type(original_col.data_type())?; + new_column_defs.push(ColumnDef::new(original_col.name().into(), ty, None, vec![])); } else { - let ty = to_ast_data_type(col.data_type())?; - new_column_defs.push(ColumnDef::new(col.name().into(), ty, None, vec![])); + let ty = to_ast_data_type(new_col.data_type())?; + new_column_defs.push(ColumnDef::new(new_col.name().into(), ty, None, vec![])); } } *original_columns = new_column_defs; @@ -162,6 +165,7 @@ pub async fn get_replace_table_plan( definition: Statement, original_catalog: &Arc, source_schema: Option, + new_version_columns: Option>, // only provided in auto schema change ) -> Result<( Option, Table, @@ -202,6 +206,7 @@ pub async fn get_replace_table_plan( on_conflict, with_version_column, cdc_table_info, + new_version_columns, ) .await?; diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index 7ef118891865e..d5d2818f0c357 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -693,6 +693,7 @@ pub(crate) async fn reparse_table_for_sink( on_conflict, with_version_column, None, + None, ) .await?; diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 0359280d28adc..a10453a43ea4e 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -27,6 +27,7 @@ use risingwave_common::catalog::{ INITIAL_TABLE_VERSION_ID, }; use risingwave_common::license::Feature; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::sort_util::{ColumnOrder, OrderType}; use risingwave_common::util::value_encoding::DatumToProtoExt; use risingwave_connector::source::cdc::build_cdc_table_id; @@ -750,10 +751,10 @@ fn gen_table_plan_inner( /// in create table workflow, the `table_id` is a placeholder will be filled in the Meta #[allow(clippy::too_many_arguments)] pub(crate) fn gen_create_table_plan_for_cdc_table( - handler_args: HandlerArgs, - explain_options: ExplainOptions, + context: OptimizerContextRef, source: Arc, external_table_name: String, + column_defs: Vec, mut columns: Vec, pk_names: Vec, connect_properties: WithOptionsSecResolved, @@ -761,12 +762,12 @@ pub(crate) fn gen_create_table_plan_for_cdc_table( on_conflict: Option, with_version_column: Option, include_column_options: IncludeOption, - resolved_table_name: String, + table_name: ObjectName, + resolved_table_name: String, // table name without schema prefix database_id: DatabaseId, schema_id: SchemaId, table_id: TableId, ) -> Result<(PlanRef, PbTable)> { - let context: OptimizerContextRef = OptimizerContext::new(handler_args, explain_options).into(); let session = context.session_ctx().clone(); // append additional columns to the end @@ -781,9 +782,18 @@ pub(crate) fn gen_create_table_plan_for_cdc_table( c.column_desc.column_id = col_id_gen.generate(c.name()) } - let (columns, pk_column_ids, _row_id_index) = + let (mut columns, pk_column_ids, _row_id_index) = bind_pk_and_row_id_on_relation(columns, pk_names, true)?; + // NOTES: In auto schema change, default value is not provided in column definition. + bind_sql_column_constraints( + context.session_ctx(), + table_name.real_value(), + &mut columns, + column_defs, + &pk_column_ids, + )?; + let definition = context.normalized_sql().to_owned(); let pk_column_indices = { @@ -986,7 +996,7 @@ pub(super) async fn handle_create_table_plan( let session = &handler_args.session; let db_name = session.database(); let (schema_name, resolved_table_name) = - Binder::resolve_schema_qualified_name(db_name, table_name)?; + Binder::resolve_schema_qualified_name(db_name, table_name.clone())?; let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name.clone())?; @@ -1020,11 +1030,13 @@ pub(super) async fn handle_create_table_plan( ) .await?; + let context: OptimizerContextRef = + OptimizerContext::new(handler_args, explain_options).into(); let (plan, table) = gen_create_table_plan_for_cdc_table( - handler_args, - explain_options, + context, source, cdc_table.external_table_name.clone(), + column_defs, columns, pk_names, connect_properties, @@ -1032,6 +1044,7 @@ pub(super) async fn handle_create_table_plan( on_conflict, with_version_column, include_column_options, + table_name, resolved_table_name, database_id, schema_id, @@ -1120,13 +1133,20 @@ fn sanity_check_for_cdc_table( Ok(()) } +struct CdcSchemaChangeArgs { + /// original table catalog + original_catalog: Arc, + /// new version table columns, only provided in auto schema change + new_version_columns: Option>, +} + +/// Derive schema for cdc table when create a new Table or alter an existing Table async fn derive_schema_for_cdc_table( column_defs: &Vec, constraints: &Vec, connect_properties: WithOptionsSecResolved, need_auto_schema_map: bool, - // original table catalog available in auto schema change process - original_catalog: Option>, + schema_change_args: Option, ) -> Result<(Vec, Vec)> { // read cdc table schema from external db or parsing the schema from SQL definitions if need_auto_schema_map { @@ -1158,14 +1178,32 @@ async fn derive_schema_for_cdc_table( table.pk_names().clone(), )) } else { - let columns = bind_sql_columns(column_defs)?; - // For table created by `create table t (*)` the constraint is empty, we need to - // retrieve primary key names from original table catalog if available - let pk_names = if let Some(original_catalog) = original_catalog { - original_catalog + let mut columns = bind_sql_columns(column_defs)?; + let pk_names = if let Some(args) = schema_change_args { + // If new_version_columns is provided, we are in the process of auto schema change. + // update the default value column since the default value column is not set in the + // column sql definition. + if let Some(new_version_columns) = args.new_version_columns { + for (col, new_version_col) in columns + .iter_mut() + .zip_eq_fast(new_version_columns.into_iter()) + { + assert_eq!(col.name(), new_version_col.name()); + col.column_desc.generated_or_default_column = + new_version_col.column_desc.generated_or_default_column; + } + } + + // For table created by `create table t (*)` the constraint is empty, we need to + // retrieve primary key names from original table catalog if available + args.original_catalog .pk .iter() - .map(|x| original_catalog.columns[x.column_index].name().to_string()) + .map(|x| { + args.original_catalog.columns[x.column_index] + .name() + .to_string() + }) .collect() } else { bind_sql_pk_names(column_defs, constraints)? @@ -1289,6 +1327,7 @@ pub async fn generate_stream_graph_for_table( on_conflict: Option, with_version_column: Option, cdc_table_info: Option, + new_version_columns: Option>, ) -> Result<(StreamFragmentGraph, Table, Option, TableJobType)> { use risingwave_pb::catalog::table::OptionalAssociatedSourceId; @@ -1342,22 +1381,28 @@ pub async fn generate_stream_graph_for_table( &constraints, connect_properties.clone(), false, - Some(original_catalog.clone()), + Some(CdcSchemaChangeArgs { + original_catalog: original_catalog.clone(), + new_version_columns, + }), ) .await?; + let context: OptimizerContextRef = + OptimizerContext::new(handler_args, ExplainOptions::default()).into(); let (plan, table) = gen_create_table_plan_for_cdc_table( - handler_args, - ExplainOptions::default(), + context, source, cdc_table.external_table_name.clone(), + column_defs, columns, pk_names, connect_properties, col_id_gen, on_conflict, with_version_column, - vec![], // empty include options + IncludeOption::default(), + table_name, resolved_table_name, database_id, schema_id, diff --git a/src/frontend/src/rpc/mod.rs b/src/frontend/src/rpc/mod.rs index 257695cc99e48..b0472a431c2dd 100644 --- a/src/frontend/src/rpc/mod.rs +++ b/src/frontend/src/rpc/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use itertools::Itertools; use pgwire::pg_server::{BoxedError, SessionManager}; use risingwave_pb::ddl_service::{ReplaceTablePlan, TableSchemaChange}; use risingwave_pb::frontend_service::frontend_service_server::FrontendService; @@ -90,16 +91,22 @@ async fn get_new_table_plan( // get a session object for the corresponding user and database let session = session_mgr.create_dummy_session(database_id, owner)?; - let new_columns = table_change.columns.into_iter().map(|c| c.into()).collect(); + let new_version_columns = table_change + .columns + .into_iter() + .map(|c| c.into()) + .collect_vec(); let table_name = ObjectName::from(vec![table_name.as_str().into()]); let (new_table_definition, original_catalog) = - get_new_table_definition_for_cdc_table(&session, table_name.clone(), new_columns).await?; + get_new_table_definition_for_cdc_table(&session, table_name.clone(), &new_version_columns) + .await?; let (_, table, graph, col_index_mapping, job_type) = get_replace_table_plan( &session, table_name, new_table_definition, &original_catalog, None, + Some(new_version_columns), ) .await?; diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs index 1e8e1c9641d8a..641bc392f7228 100644 --- a/src/meta/service/src/ddl_service.rs +++ b/src/meta/service/src/ddl_service.rs @@ -19,6 +19,7 @@ use anyhow::anyhow; use rand::seq::SliceRandom; use rand::thread_rng; use risingwave_common::catalog::ColumnCatalog; +use risingwave_common::types::DataType; use risingwave_common::util::column_index_mapping::ColIndexMapping; use risingwave_connector::sink::catalog::SinkId; use risingwave_meta::manager::{EventLogManagerRef, MetadataManager}; @@ -969,40 +970,41 @@ impl DdlService for DdlServiceImpl { for table in tables { // Since we only support `ADD` and `DROP` column, we check whether the new columns and the original columns // is a subset of the other. - let original_column_names: HashSet = HashSet::from_iter( - table - .columns - .iter() - .map(|col| ColumnCatalog::from(col.clone()).column_desc.name), - ); - let new_column_names: HashSet = HashSet::from_iter( - table_change - .columns - .iter() - .map(|col| ColumnCatalog::from(col.clone()).column_desc.name), - ); - if !(original_column_names.is_subset(&new_column_names) - || original_column_names.is_superset(&new_column_names)) + let original_columns: HashSet<(String, DataType)> = + HashSet::from_iter(table.columns.iter().map(|col| { + let col = ColumnCatalog::from(col.clone()); + let data_type = col.data_type().clone(); + (col.column_desc.name, data_type) + })); + let new_columns: HashSet<(String, DataType)> = + HashSet::from_iter(table_change.columns.iter().map(|col| { + let col = ColumnCatalog::from(col.clone()); + let data_type = col.data_type().clone(); + (col.column_desc.name, data_type) + })); + + if !(original_columns.is_subset(&new_columns) + || original_columns.is_superset(&new_columns)) { tracing::warn!(target: "auto_schema_change", table_id = table.id, cdc_table_id = table.cdc_table_id, upstraem_ddl = table_change.upstream_ddl, - original_columns = ?original_column_names, - new_columns = ?new_column_names, + original_columns = ?original_columns, + new_columns = ?new_columns, "New columns should be a subset or superset of the original columns, since only `ADD COLUMN` and `DROP COLUMN` is supported"); return Err(Status::invalid_argument( "New columns should be a subset or superset of the original columns", )); } // skip the schema change if there is no change to original columns - if original_column_names == new_column_names { + if original_columns == new_columns { tracing::warn!(target: "auto_schema_change", table_id = table.id, cdc_table_id = table.cdc_table_id, upstraem_ddl = table_change.upstream_ddl, - original_columns = ?original_column_names, - new_columns = ?new_column_names, + original_columns = ?original_columns, + new_columns = ?new_columns, "No change to columns, skipping the schema change"); continue; } From f05d549efb5c1c2287047c07750ed210645c8b81 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Tue, 3 Sep 2024 13:18:18 +0800 Subject: [PATCH 07/26] fix(streaming): correctly retrieve initial split assignments from combined mutation for sink-into-table (#18356) Signed-off-by: Bugen Zhao --- e2e_test/source_inline/kafka/issue_18308.slt | 50 +++++++++++++++++ src/stream/src/executor/mod.rs | 54 +++++++++++++++++-- .../src/executor/source/fs_source_executor.rs | 15 ++---- .../source/source_backfill_executor.rs | 18 ++----- .../src/executor/source/source_executor.rs | 22 ++------ 5 files changed, 112 insertions(+), 47 deletions(-) create mode 100644 e2e_test/source_inline/kafka/issue_18308.slt diff --git a/e2e_test/source_inline/kafka/issue_18308.slt b/e2e_test/source_inline/kafka/issue_18308.slt new file mode 100644 index 0000000000000..f7c0fa2f8062e --- /dev/null +++ b/e2e_test/source_inline/kafka/issue_18308.slt @@ -0,0 +1,50 @@ +control substitution on + +system ok +rpk topic create test-topic-18308 + +statement ok +CREATE SOURCE kafkasource ( + id int, + name string, +) +WITH ( + ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON}, + topic = 'test-topic-18308', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; + +statement ok +CREATE TABLE compact_table ( + id int, + name varchar, + PRIMARY KEY (id) +); + +statement ok +CREATE SINK table_sink INTO compact_table AS SELECT * FROM kafkasource; + +system ok +echo '{ "id": 1, "name": "xxchan" }' | rpk topic produce test-topic-18308 + +sleep 5s + +statement ok +flush; + +query IT +SELECT * FROM compact_table; +---- +1 xxchan + +statement ok +DROP SINK table_sink; + +statement ok +DROP TABLE compact_table; + +statement ok +DROP SOURCE kafkasource; + +system ok +rpk topic delete test-topic-18308 diff --git a/src/stream/src/executor/mod.rs b/src/stream/src/executor/mod.rs index 5ade4c8243e03..7b22a48a25ab6 100644 --- a/src/stream/src/executor/mod.rs +++ b/src/stream/src/executor/mod.rs @@ -264,6 +264,7 @@ where pub const INVALID_EPOCH: u64 = 0; type UpstreamFragmentId = FragmentId; +type SplitAssignments = HashMap>; #[derive(Debug, Clone, PartialEq)] pub struct UpdateMutation { @@ -271,7 +272,7 @@ pub struct UpdateMutation { pub merges: HashMap<(ActorId, UpstreamFragmentId), MergeUpdate>, pub vnode_bitmaps: HashMap>, pub dropped_actors: HashSet, - pub actor_splits: HashMap>, + pub actor_splits: SplitAssignments, pub actor_new_dispatchers: HashMap>, } @@ -280,7 +281,7 @@ pub struct AddMutation { pub adds: HashMap>, pub added_actors: HashSet, // TODO: remove this and use `SourceChangesSplit` after we support multiple mutations. - pub splits: HashMap>, + pub splits: SplitAssignments, pub pause: bool, /// (`upstream_mv_table_id`, `subscriber_id`) pub subscriptions_to_add: Vec<(TableId, u32)>, @@ -292,7 +293,7 @@ pub enum Mutation { Stop(HashSet), Update(UpdateMutation), Add(AddMutation), - SourceChangeSplit(HashMap>), + SourceChangeSplit(SplitAssignments), Pause, Resume, Throttle(HashMap>), @@ -382,6 +383,51 @@ impl Barrier { .map_or(false, |actors| actors.contains(&actor_id)) } + /// Get the initial split assignments for the actor with `actor_id`. + /// + /// This should only be called on the initial barrier received by the executor. It must be + /// + /// - `Add` mutation when it's a new streaming job, or recovery. + /// - `Update` mutation when it's created for scaling. + /// - `AddAndUpdate` mutation when it's created for sink-into-table. + /// + /// Note that `SourceChangeSplit` is **not** included, because it's only used for changing splits + /// of existing executors. + pub fn initial_split_assignment(&self, actor_id: ActorId) -> Option<&[SplitImpl]> { + match self.mutation.as_deref()? { + Mutation::Update(UpdateMutation { actor_splits, .. }) + | Mutation::Add(AddMutation { + splits: actor_splits, + .. + }) => actor_splits.get(&actor_id), + + Mutation::AddAndUpdate( + AddMutation { + splits: add_actor_splits, + .. + }, + UpdateMutation { + actor_splits: update_actor_splits, + .. + }, + ) => add_actor_splits + .get(&actor_id) + // `Add` and `Update` should apply to different fragments, so we don't need to merge them. + .or_else(|| update_actor_splits.get(&actor_id)), + + _ => { + if cfg!(debug_assertions) { + panic!( + "the initial mutation of the barrier should not be {:?}", + self.mutation + ); + } + None + } + } + .map(|s| s.as_slice()) + } + /// Get all actors that to be stopped (dropped) by this barrier. pub fn all_stop_actors(&self) -> Option<&HashSet> { match self.mutation.as_deref() { @@ -563,7 +609,7 @@ impl Mutation { } fn to_protobuf(&self) -> PbMutation { - let actor_splits_to_protobuf = |actor_splits: &HashMap>| { + let actor_splits_to_protobuf = |actor_splits: &SplitAssignments| { actor_splits .iter() .map(|(&actor_id, splits)| { diff --git a/src/stream/src/executor/source/fs_source_executor.rs b/src/stream/src/executor/source/fs_source_executor.rs index 6754570c4930b..70f0ce5f4f24b 100644 --- a/src/stream/src/executor/source/fs_source_executor.rs +++ b/src/stream/src/executor/source/fs_source_executor.rs @@ -41,7 +41,7 @@ use super::{ use crate::common::rate_limit::limited_chunk_size; use crate::executor::prelude::*; use crate::executor::stream_reader::StreamReaderWithPause; -use crate::executor::{AddMutation, UpdateMutation}; +use crate::executor::UpdateMutation; /// A constant to multiply when calculating the maximum time to wait for a barrier. This is due to /// some latencies in network and cost in meta. @@ -316,17 +316,8 @@ impl FsSourceExecutor { let start_with_paused = barrier.is_pause_on_startup(); let mut boot_state = Vec::default(); - if let Some( - Mutation::Add(AddMutation { splits, .. }) - | Mutation::Update(UpdateMutation { - actor_splits: splits, - .. - }), - ) = barrier.mutation.as_deref() - { - if let Some(splits) = splits.get(&self.actor_ctx.id) { - boot_state.clone_from(splits); - } + if let Some(splits) = barrier.initial_split_assignment(self.actor_ctx.id) { + boot_state = splits.to_vec(); } self.stream_source_core diff --git a/src/stream/src/executor/source/source_backfill_executor.rs b/src/stream/src/executor/source/source_backfill_executor.rs index 6b22293331306..9c3336878f952 100644 --- a/src/stream/src/executor/source/source_backfill_executor.rs +++ b/src/stream/src/executor/source/source_backfill_executor.rs @@ -38,7 +38,7 @@ use super::{apply_rate_limit, get_split_offset_col_idx}; use crate::common::rate_limit::limited_chunk_size; use crate::executor::prelude::*; use crate::executor::source::source_executor::WAIT_BARRIER_MULTIPLE_TIMES; -use crate::executor::{AddMutation, UpdateMutation}; +use crate::executor::UpdateMutation; #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] pub enum BackfillState { @@ -305,20 +305,10 @@ impl SourceBackfillExecutorInner { }; let mut owned_splits = Vec::default(); - if let Some(mutation) = barrier.mutation.as_ref() { - match mutation.as_ref() { - Mutation::Add(AddMutation { splits, .. }) - | Mutation::Update(UpdateMutation { - actor_splits: splits, - .. - }) => { - if let Some(splits) = splits.get(&self.actor_ctx.id) { - owned_splits.clone_from(splits); - } - } - _ => {} - } + if let Some(splits) = barrier.initial_split_assignment(self.actor_ctx.id) { + owned_splits = splits.to_vec(); } + self.backfill_state_store.init_epoch(barrier.epoch); let mut backfill_states: BackfillStates = HashMap::new(); diff --git a/src/stream/src/executor/source/source_executor.rs b/src/stream/src/executor/source/source_executor.rs index 1531dca93b909..dd93ac85d1f1c 100644 --- a/src/stream/src/executor/source/source_executor.rs +++ b/src/stream/src/executor/source/source_executor.rs @@ -45,7 +45,7 @@ use super::{ use crate::common::rate_limit::limited_chunk_size; use crate::executor::prelude::*; use crate::executor::stream_reader::StreamReaderWithPause; -use crate::executor::{AddMutation, UpdateMutation}; +use crate::executor::UpdateMutation; /// A constant to multiply when calculating the maximum time to wait for a barrier. This is due to /// some latencies in network and cost in meta. @@ -445,22 +445,9 @@ impl SourceExecutor { }; let mut boot_state = Vec::default(); - if let Some( - Mutation::Add(AddMutation { splits, .. }) - | Mutation::Update(UpdateMutation { - actor_splits: splits, - .. - }), - ) = barrier.mutation.as_deref() - { - if let Some(splits) = splits.get(&self.actor_ctx.id) { - tracing::debug!( - "source exector: actor {:?} boot with splits: {:?}", - self.actor_ctx.id, - splits - ); - boot_state.clone_from(splits); - } + if let Some(splits) = barrier.initial_split_assignment(self.actor_ctx.id) { + tracing::debug!(?splits, "boot with splits"); + boot_state = splits.to_vec(); } core.split_state_store.init_epoch(barrier.epoch); @@ -889,6 +876,7 @@ mod tests { use super::*; use crate::executor::source::{default_source_internal_table, SourceStateTableHandler}; + use crate::executor::AddMutation; const MOCK_SOURCE_NAME: &str = "mock_source"; From 5d8b1650ecd0d9e34a3eed519a9723752c5ea633 Mon Sep 17 00:00:00 2001 From: Dylan Date: Tue, 3 Sep 2024 13:40:45 +0800 Subject: [PATCH 08/26] feat(iceberg): support create table for jdbc catalog (#18364) --- .../connector/catalog/JniCatalogWrapper.java | 34 +++++ src/connector/src/sink/iceberg/jni_catalog.rs | 123 ++++++++++++++++-- 2 files changed, 147 insertions(+), 10 deletions(-) diff --git a/java/connector-node/risingwave-sink-iceberg/src/main/java/com/risingwave/connector/catalog/JniCatalogWrapper.java b/java/connector-node/risingwave-sink-iceberg/src/main/java/com/risingwave/connector/catalog/JniCatalogWrapper.java index 583747f3b2f3f..30e8230a6dc9b 100644 --- a/java/connector-node/risingwave-sink-iceberg/src/main/java/com/risingwave/connector/catalog/JniCatalogWrapper.java +++ b/java/connector-node/risingwave-sink-iceberg/src/main/java/com/risingwave/connector/catalog/JniCatalogWrapper.java @@ -22,8 +22,10 @@ import java.util.Objects; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.UpdateTableRequest; import org.apache.iceberg.rest.responses.LoadTableResponse; @@ -62,6 +64,38 @@ public String updateTable(String updateTableRequest) throws Exception { return RESTObjectMapper.mapper().writer().writeValueAsString(resp); } + /** + * Create table through this prox. + * + * @param namespaceStr String. + * @param createTableRequest Request serialized using json. + * @return Response serialized using json. + * @throws Exception + */ + public String createTable(String namespaceStr, String createTableRequest) throws Exception { + Namespace namespace; + if (namespaceStr == null) { + namespace = Namespace.empty(); + } else { + namespace = Namespace.of(namespaceStr); + } + CreateTableRequest req = + RESTObjectMapper.mapper().readValue(createTableRequest, CreateTableRequest.class); + LoadTableResponse resp = CatalogHandlers.createTable(catalog, namespace, req); + return RESTObjectMapper.mapper().writer().writeValueAsString(resp); + } + + /** + * Checks if a table exists in the catalog. + * + * @param tableIdentifier The identifier of the table to check. + * @return true if the table exists, false otherwise. + */ + public boolean tableExists(String tableIdentifier) { + TableIdentifier id = TableIdentifier.parse(tableIdentifier); + return catalog.tableExists(id); + } + /** * Create JniCatalogWrapper instance. * diff --git a/src/connector/src/sink/iceberg/jni_catalog.rs b/src/connector/src/sink/iceberg/jni_catalog.rs index 6ef251878ff94..b80a6a305870f 100644 --- a/src/connector/src/sink/iceberg/jni_catalog.rs +++ b/src/connector/src/sink/iceberg/jni_catalog.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use anyhow::Context; use async_trait::async_trait; use iceberg::io::FileIO; -use iceberg::spec::TableMetadata; +use iceberg::spec::{Schema, SortOrder, TableMetadata, UnboundPartitionSpec}; use iceberg::table::Table as TableV2; use iceberg::{ Catalog as CatalogV2, Namespace, NamespaceIdent, TableCommit, TableCreation, TableIdent, @@ -34,9 +34,10 @@ use icelake::{ErrorKind, Table, TableIdentifier}; use itertools::Itertools; use jni::objects::{GlobalRef, JObject}; use jni::JavaVM; +use risingwave_common::bail; use risingwave_jni_core::call_method; use risingwave_jni_core::jvm_runtime::{execute_with_jni_env, jobj_to_str, JVM}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use crate::error::ConnectorResult; @@ -48,6 +49,36 @@ struct LoadTableResponse { pub _config: Option>, } +#[derive(Debug, Serialize)] +#[serde(rename_all = "kebab-case")] +struct CreateTableRequest { + /// The name of the table. + pub name: String, + /// The location of the table. + pub location: Option, + /// The schema of the table. + pub schema: Schema, + /// The partition spec of the table, could be None. + pub partition_spec: Option, + /// The sort order of the table. + pub write_order: Option, + /// The properties of the table. + pub properties: HashMap, +} + +impl From<&TableCreation> for CreateTableRequest { + fn from(value: &TableCreation) -> Self { + Self { + name: value.name.clone(), + location: value.location.clone(), + schema: value.schema.clone(), + partition_spec: value.partition_spec.clone(), + write_order: value.sort_order.clone(), + properties: value.properties.clone(), + } + } +} + #[derive(Debug)] pub struct JniCatalog { java_catalog: GlobalRef, @@ -206,10 +237,58 @@ impl CatalogV2 for JniCatalog { /// Create a new table inside the namespace. async fn create_table( &self, - _namespace: &NamespaceIdent, - _creation: TableCreation, + namespace: &NamespaceIdent, + creation: TableCreation, ) -> iceberg::Result { - todo!() + execute_with_jni_env(self.jvm, |env| { + let namespace_jstr = if namespace.is_empty() { + env.new_string("").unwrap() + } else { + if namespace.len() > 1 { + bail!("Namespace with more than one level is not supported!") + } + env.new_string(&namespace[0]).unwrap() + }; + + let creation_str = serde_json::to_string(&CreateTableRequest::from(&creation))?; + + let creation_jstr = env.new_string(&creation_str).unwrap(); + + let result_json = + call_method!(env, self.java_catalog.as_obj(), {String createTable(String, String)}, + &namespace_jstr, &creation_jstr) + .with_context(|| format!("Failed to create iceberg table: {}", creation.name))?; + + let rust_json_str = jobj_to_str(env, result_json)?; + + let resp: LoadTableResponse = serde_json::from_str(&rust_json_str)?; + + let metadata_location = resp.metadata_location.ok_or_else(|| { + iceberg::Error::new( + iceberg::ErrorKind::FeatureUnsupported, + "Loading uncommitted table is not supported!", + ) + })?; + + let table_metadata = resp.metadata; + + let file_io = FileIO::from_path(&metadata_location)? + .with_props(self.config.table_io_configs.iter()) + .build()?; + + Ok(TableV2::builder() + .file_io(file_io) + .identifier(TableIdent::new(namespace.clone(), creation.name)) + .metadata(table_metadata) + .build()) + }) + .map_err(|e| { + iceberg::Error::new( + iceberg::ErrorKind::Unexpected, + "Failed to crete iceberg table.", + ) + .with_source(e) + }) } /// Load table from the catalog. @@ -233,8 +312,8 @@ impl CatalogV2 for JniCatalog { let resp: LoadTableResponse = serde_json::from_str(&rust_json_str)?; let metadata_location = resp.metadata_location.ok_or_else(|| { - icelake::Error::new( - ErrorKind::IcebergFeatureUnsupported, + iceberg::Error::new( + iceberg::ErrorKind::FeatureUnsupported, "Loading uncommitted table is not supported!", ) })?; @@ -268,8 +347,32 @@ impl CatalogV2 for JniCatalog { } /// Check if a table exists in the catalog. - async fn table_exists(&self, _table: &TableIdent) -> iceberg::Result { - todo!() + async fn table_exists(&self, table: &TableIdent) -> iceberg::Result { + execute_with_jni_env(self.jvm, |env| { + let table_name_str = format!( + "{}.{}", + table.namespace().clone().inner().into_iter().join("."), + table.name() + ); + + let table_name_jstr = env.new_string(&table_name_str).unwrap(); + + let exists = + call_method!(env, self.java_catalog.as_obj(), {boolean tableExists(String)}, + &table_name_jstr) + .with_context(|| { + format!("Failed to check iceberg table exists: {table_name_str}") + })?; + + Ok(exists) + }) + .map_err(|e| { + iceberg::Error::new( + iceberg::ErrorKind::Unexpected, + "Failed to load iceberg table.", + ) + .with_source(e) + }) } /// Rename a table in the catalog. @@ -326,7 +429,7 @@ impl JniCatalog { config: base_config, }) }) - .map_err(Into::into) + .map_err(Into::into) } pub fn build_catalog( From 5ab2a59ed708f2d704455bbaa5e51e56e58db757 Mon Sep 17 00:00:00 2001 From: Dylan Date: Tue, 3 Sep 2024 14:30:41 +0800 Subject: [PATCH 09/26] feat(iceberg): support iceberg sink create table (#18362) --- e2e_test/sink/iceberg_sink.slt | 55 ++++++++++-- src/connector/src/sink/iceberg/mod.rs | 88 ++++++++++++++++++- .../src/sink/iceberg/storage_catalog.rs | 65 ++++++++++++-- src/connector/with_options_sink.yaml | 4 + src/frontend/src/handler/create_sink.rs | 4 + 5 files changed, 199 insertions(+), 17 deletions(-) diff --git a/e2e_test/sink/iceberg_sink.slt b/e2e_test/sink/iceberg_sink.slt index d55ec5d28b1f9..e3917908f651b 100644 --- a/e2e_test/sink/iceberg_sink.slt +++ b/e2e_test/sink/iceberg_sink.slt @@ -48,13 +48,55 @@ CREATE SOURCE iceberg_demo_source WITH ( table.name='e2e_demo_table' ); +statement ok +CREATE SINK s7 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3 from mv6 WITH ( + connector = 'iceberg', + type = 'upsert', + primary_key = 'v1', + warehouse.path = 's3a://hummock001', + s3.endpoint = 'http://127.0.0.1:9301', + s3.access.key = secret iceberg_s3_access_key, + s3.secret.key = secret iceberg_s3_secret_key, + s3.region = 'us-east-1', + catalog.name = 'demo', + catalog.type = 'storage', + database.name='demo_db', + table.name='e2e_auto_create_table', + commit_checkpoint_interval = 1, + create_table_if_not_exists = 'true' +); + +statement ok +CREATE SOURCE iceberg_e2e_auto_create_table WITH ( + connector = 'iceberg', + warehouse.path = 's3a://hummock001', + s3.endpoint = 'http://127.0.0.1:9301', + s3.access.key = secret iceberg_s3_access_key, + s3.secret.key = secret iceberg_s3_secret_key, + s3.region = 'us-east-1', + catalog.name = 'demo', + catalog.type = 'storage', + database.name='demo_db', + table.name='e2e_auto_create_table' +); + statement ok INSERT INTO t6 VALUES (1, 2, '1-2'), (2, 2, '2-2'), (3, 2, '3-2'), (5, 2, '5-2'), (8, 2, '8-2'), (13, 2, '13-2'), (21, 2, '21-2'); statement ok FLUSH; -sleep 5s +sleep 20s + +query I +select count(*) from rw_iceberg_snapshots where source_name = 'iceberg_demo_source'; +---- +1 + +query I +select count(*) from rw_iceberg_snapshots where source_name = 'iceberg_e2e_auto_create_table'; +---- +1 query I select sum(record_count) from rw_iceberg_files where source_name = 'iceberg_demo_source'; @@ -62,9 +104,9 @@ select sum(record_count) from rw_iceberg_files where source_name = 'iceberg_demo 7 query I -select count(*) from rw_iceberg_snapshots where source_name = 'iceberg_demo_source'; +select sum(record_count) from rw_iceberg_files where source_name = 'iceberg_e2e_auto_create_table'; ---- -1 +7 statement ok INSERT INTO t6 VALUES (1, 50, '1-50'); @@ -78,10 +120,7 @@ statement ok DROP SOURCE iceberg_demo_source; statement ok -DROP SINK s6; - -statement ok -DROP MATERIALIZED VIEW mv6; +DROP SOURCE iceberg_e2e_auto_create_table; statement ok -DROP TABLE t6; +DROP TABLE t6 cascade; diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index 14dce5fd30ce0..b68e74b1f5d95 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -31,7 +31,7 @@ use async_trait::async_trait; use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; use iceberg::spec::TableMetadata; use iceberg::table::Table as TableV2; -use iceberg::{Catalog as CatalogV2, TableIdent}; +use iceberg::{Catalog as CatalogV2, NamespaceIdent, TableCreation, TableIdent}; use iceberg_catalog_glue::{AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY}; use icelake::catalog::{ load_catalog, load_iceberg_base_catalog_config, BaseCatalogConfig, CatalogRef, CATALOG_NAME, @@ -43,9 +43,10 @@ use icelake::io_v2::{ DataFileWriterBuilder, EqualityDeltaWriterBuilder, IcebergWriterBuilder, DELETE_OP, INSERT_OP, }; use icelake::transaction::Transaction; -use icelake::types::{data_file_from_json, data_file_to_json, Any, DataFile}; +use icelake::types::{data_file_from_json, data_file_to_json, Any, DataFile, COLUMN_ID_META_KEY}; use icelake::{Table, TableIdentifier}; use itertools::Itertools; +use parquet::arrow::PARQUET_FIELD_ID_META_KEY; use risingwave_common::array::arrow::IcebergArrowConvert; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::bail; @@ -151,6 +152,9 @@ pub struct IcebergConfig { #[serde(default = "default_commit_checkpoint_interval")] #[serde_as(as = "DisplayFromStr")] pub commit_checkpoint_interval: u64, + + #[serde(default, deserialize_with = "deserialize_bool_from_string")] + pub create_table_if_not_exists: bool, } impl IcebergConfig { @@ -701,6 +705,10 @@ impl Debug for IcebergSink { impl IcebergSink { async fn create_and_validate_table(&self) -> Result { + if self.config.create_table_if_not_exists { + self.create_table_if_not_exists().await?; + } + let table = self .config .load_table() @@ -722,6 +730,79 @@ impl IcebergSink { Ok(table) } + async fn create_table_if_not_exists(&self) -> Result<()> { + let catalog = self.config.create_catalog_v2().await?; + let table_id = self + .config + .full_table_name_v2() + .context("Unable to parse table name")?; + if !catalog + .table_exists(&table_id) + .await + .map_err(|e| SinkError::Iceberg(anyhow!(e)))? + { + let namespace = if let Some(database_name) = &self.config.database_name { + NamespaceIdent::new(database_name.clone()) + } else { + bail!("database name must be set if you want to create table") + }; + + // convert risingwave schema -> arrow schema -> iceberg schema + let arrow_fields = self + .param + .columns + .iter() + .map(|column| { + let mut arrow_field = IcebergArrowConvert + .to_arrow_field(&column.name, &column.data_type) + .map_err(|e| SinkError::Iceberg(anyhow!(e))) + .context(format!( + "failed to convert {}: {} to arrow type", + &column.name, &column.data_type + ))?; + let mut metadata = HashMap::new(); + metadata.insert( + PARQUET_FIELD_ID_META_KEY.to_string(), + column.column_id.get_id().to_string(), + ); + metadata.insert( + COLUMN_ID_META_KEY.to_string(), + column.column_id.get_id().to_string(), + ); + arrow_field.set_metadata(metadata); + Ok(arrow_field) + }) + .collect::>>()?; + let arrow_schema = arrow_schema_iceberg::Schema::new(arrow_fields); + let iceberg_schema = iceberg::arrow::arrow_schema_to_schema(&arrow_schema) + .map_err(|e| SinkError::Iceberg(anyhow!(e))) + .context("failed to convert arrow schema to iceberg schema")?; + + let location = { + let mut names = namespace.clone().inner(); + names.push(self.config.table_name.to_string()); + if self.config.path.ends_with('/') { + format!("{}{}", self.config.path, names.join("/")) + } else { + format!("{}/{}", self.config.path, names.join("/")) + } + }; + + let table_creation = TableCreation::builder() + .name(self.config.table_name.clone()) + .schema(iceberg_schema) + .location(location) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .map_err(|e| SinkError::Iceberg(anyhow!(e))) + .context("failed to create iceberg table")?; + } + Ok(()) + } + pub fn new(config: IcebergConfig, param: SinkParam) -> Result { let unique_column_ids = if config.r#type == SINK_TYPE_UPSERT && !config.force_append_only { if let Some(pk) = &config.primary_key { @@ -1292,6 +1373,8 @@ pub fn try_matches_arrow_schema( let compatible = match (&converted_arrow_data_type, arrow_field.data_type()) { (ArrowDataType::Decimal128(_, _), ArrowDataType::Decimal128(_, _)) => true, + (ArrowDataType::Binary, ArrowDataType::LargeBinary) => true, + (ArrowDataType::LargeBinary, ArrowDataType::Binary) => true, (left, right) => left == right, }; if !compatible { @@ -1394,6 +1477,7 @@ mod test { .map(|(k, v)| (k.to_string(), v.to_string())) .collect(), commit_checkpoint_interval: DEFAULT_COMMIT_CHECKPOINT_INTERVAL, + create_table_if_not_exists: false, }; assert_eq!(iceberg_config, expected_iceberg_config); diff --git a/src/connector/src/sink/iceberg/storage_catalog.rs b/src/connector/src/sink/iceberg/storage_catalog.rs index 7fd025020e1d9..01adb510882a2 100644 --- a/src/connector/src/sink/iceberg/storage_catalog.rs +++ b/src/connector/src/sink/iceberg/storage_catalog.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use async_trait::async_trait; use iceberg::io::{FileIO, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; -use iceberg::spec::TableMetadata; +use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, @@ -218,10 +218,42 @@ impl Catalog for StorageCatalog { /// Create a new table inside the namespace. async fn create_table( &self, - _namespace: &NamespaceIdent, - _creation: TableCreation, + namespace: &NamespaceIdent, + creation: TableCreation, ) -> iceberg::Result
{ - todo!() + let table_ident = TableIdent::new(namespace.clone(), creation.name.clone()); + let table_path = { + let mut names = table_ident.namespace.clone().inner(); + names.push(table_ident.name.to_string()); + if self.warehouse.ends_with('/') { + format!("{}{}", self.warehouse, names.join("/")) + } else { + format!("{}/{}", self.warehouse, names.join("/")) + } + }; + + // Create the metadata directory + let metadata_path = format!("{table_path}/metadata"); + + // Create the initial table metadata + let table_metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?; + + // Write the initial metadata file + let metadata_file_path = format!("{metadata_path}/v1.metadata.json"); + let metadata_json = serde_json::to_string(&table_metadata)?; + let output = self.file_io.new_output(&metadata_file_path)?; + output.write(metadata_json.into()).await?; + + // Write the version hint file + let version_hint_path = format!("{table_path}/metadata/version-hint.text"); + let version_hint_output = self.file_io.new_output(&version_hint_path)?; + version_hint_output.write("1".into()).await?; + + Ok(Table::builder() + .metadata(table_metadata) + .identifier(table_ident) + .file_io(self.file_io.clone()) + .build()) } /// Load table from the catalog. @@ -229,7 +261,11 @@ impl Catalog for StorageCatalog { let table_path = { let mut names = table.namespace.clone().inner(); names.push(table.name.to_string()); - format!("{}/{}", self.warehouse, names.join("/")) + if self.warehouse.ends_with('/') { + format!("{}{}", self.warehouse, names.join("/")) + } else { + format!("{}/{}", self.warehouse, names.join("/")) + } }; let path = if self.is_version_hint_exist(&table_path).await? { let version_hint = self.read_version_hint(&table_path).await?; @@ -262,8 +298,23 @@ impl Catalog for StorageCatalog { } /// Check if a table exists in the catalog. - async fn table_exists(&self, _table: &TableIdent) -> iceberg::Result { - todo!() + async fn table_exists(&self, table: &TableIdent) -> iceberg::Result { + let table_path = { + let mut names = table.namespace.clone().inner(); + names.push(table.name.to_string()); + if self.warehouse.ends_with('/') { + format!("{}{}", self.warehouse, names.join("/")) + } else { + format!("{}/{}", self.warehouse, names.join("/")) + } + }; + let metadata_path = format!("{table_path}/metadata/version-hint.text"); + self.file_io.is_exist(&metadata_path).await.map_err(|err| { + Error::new( + ErrorKind::Unexpected, + format!("Failed to check if table exists: {}", err.as_report()), + ) + }) } /// Rename a table in the catalog. diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index f321de880c72c..d028ef5e30198 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -348,6 +348,10 @@ IcebergConfig: comments: Commit every n(>0) checkpoints, default is 10. required: false default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL + - name: create_table_if_not_exists + field_type: bool + required: false + default: Default::default KafkaConfig: fields: - name: properties.bootstrap.server diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index d5d2818f0c357..d0bd1d0cc8f2f 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -342,6 +342,10 @@ pub async fn get_partition_compute_info( async fn get_partition_compute_info_for_iceberg( iceberg_config: &IcebergConfig, ) -> Result> { + // TODO: check table if exists + if iceberg_config.create_table_if_not_exists { + return Ok(None); + } let table = iceberg_config.load_table().await?; let Some(partition_spec) = table.current_table_metadata().current_partition_spec().ok() else { return Ok(None); From 51d3c63f702ddf8860f6223a1e9a425e034d3375 Mon Sep 17 00:00:00 2001 From: xxchan Date: Tue, 3 Sep 2024 16:03:15 +0800 Subject: [PATCH 10/26] fix: `DROP DATABASE` doesn't clean up the source stream job (in v1) (#18033) Signed-off-by: xxchan --- src/meta/src/manager/catalog/mod.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs index 81e4f1c4d96c3..12c1596841f67 100644 --- a/src/meta/src/manager/catalog/mod.rs +++ b/src/meta/src/manager/catalog/mod.rs @@ -625,20 +625,21 @@ impl CatalogManager { .notify_frontend(Operation::Delete, Info::Database(database)) .await; - let catalog_deleted_ids = tables_to_drop + let streaming_job_deleted_ids = tables_to_drop .into_iter() .filter(|table| valid_table_name(&table.name)) .map(|table| StreamingJobId::new(table.id)) + .chain(sources_to_drop.iter().filter_map(|source| { + source + .info + .as_ref() + .and_then(|info| info.is_shared().then(|| StreamingJobId::new(source.id))) + })) .chain( sinks_to_drop .into_iter() .map(|sink| StreamingJobId::new(sink.id)), ) - .chain( - subscriptions_to_drop - .into_iter() - .map(|subscription| StreamingJobId::new(subscription.id)), - ) .collect_vec(); let source_deleted_ids = sources_to_drop .into_iter() @@ -647,7 +648,7 @@ impl CatalogManager { Ok(( version, - catalog_deleted_ids, + streaming_job_deleted_ids, source_deleted_ids, connections_dropped, )) From c519f0dbb13cd66d1b5cb5bfb1325150f0e5d70b Mon Sep 17 00:00:00 2001 From: Bohan Zhang Date: Tue, 3 Sep 2024 16:13:14 +0800 Subject: [PATCH 11/26] feat(telemetry): add telemetry data point when checking license (#18371) Signed-off-by: tabVersion --- Cargo.lock | 3 +++ src/license/Cargo.toml | 3 +++ src/license/src/feature.rs | 18 +++++++++++++++--- src/license/src/lib.rs | 23 +++++++++++++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c8bb3bb7afa86..c1b1ec57fdece 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11166,7 +11166,10 @@ name = "risingwave_license" version = "2.1.0-alpha" dependencies = [ "expect-test", + "jsonbb", "jsonwebtoken", + "risingwave_pb", + "risingwave_telemetry_event", "serde", "thiserror", "thiserror-ext", diff --git a/src/license/Cargo.toml b/src/license/Cargo.toml index 47e00228626b8..b435747467e21 100644 --- a/src/license/Cargo.toml +++ b/src/license/Cargo.toml @@ -15,7 +15,10 @@ ignored = ["workspace-hack"] normal = ["workspace-hack"] [dependencies] +jsonbb = { workspace = true } jsonwebtoken = "9" +risingwave_pb = { workspace = true } +risingwave_telemetry_event = { workspace = true } serde = { version = "1", features = ["derive"] } thiserror = "1" thiserror-ext = { workspace = true } diff --git a/src/license/src/feature.rs b/src/license/src/feature.rs index b7082c01dd7b4..583ef93a45863 100644 --- a/src/license/src/feature.rs +++ b/src/license/src/feature.rs @@ -14,7 +14,7 @@ use thiserror::Error; -use super::{License, LicenseKeyError, LicenseManager, Tier}; +use super::{report_telemetry, License, LicenseKeyError, LicenseManager, Tier}; /// Define all features that are available based on the tier of the license. /// @@ -84,6 +84,14 @@ macro_rules! def_feature { )* } } + + fn get_feature_name(&self) -> &'static str { + match &self { + $( + Self::$name => stringify!($name), + )* + } + } } }; } @@ -113,7 +121,7 @@ pub enum FeatureNotAvailable { impl Feature { /// Check whether the feature is available based on the current license. pub fn check_available(self) -> Result<(), FeatureNotAvailable> { - match LicenseManager::get().license() { + let check_res = match LicenseManager::get().license() { Ok(license) => { if license.tier >= self.min_tier() { Ok(()) @@ -136,6 +144,10 @@ impl Feature { }) } } - } + }; + + report_telemetry(&self, self.get_feature_name(), check_res.is_ok()); + + check_res } } diff --git a/src/license/src/lib.rs b/src/license/src/lib.rs index e2a3275780098..cf62dbab1d491 100644 --- a/src/license/src/lib.rs +++ b/src/license/src/lib.rs @@ -20,3 +20,26 @@ mod manager; pub use feature::*; pub use key::*; pub use manager::*; +use risingwave_pb::telemetry::PbTelemetryEventStage; +use risingwave_telemetry_event::report_event_common; + +pub(crate) fn report_telemetry(feature: &Feature, feature_name: &str, success_flag: bool) { + if matches!(feature, Feature::TestPaid) { + let mut attr_builder = jsonbb::Builder::>::new(); + attr_builder.begin_object(); + attr_builder.add_string("success"); + attr_builder.add_value(jsonbb::ValueRef::Bool(success_flag)); + attr_builder.end_object(); + let attr = attr_builder.finish(); + + report_event_common( + PbTelemetryEventStage::Unspecified, + feature_name, + 0, + None, + None, + Some(attr), + "paywall".to_string(), + ); + } +} From c0ce8a8a6b4d51e463a605fb307fa4b014e0f609 Mon Sep 17 00:00:00 2001 From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com> Date: Tue, 3 Sep 2024 16:53:03 +0800 Subject: [PATCH 12/26] feat(sink): support async for bigquery sink (#17488) --- src/connector/src/sink/big_query.rs | 379 ++++++++++++++++----------- src/connector/with_options_sink.yaml | 8 - 2 files changed, 231 insertions(+), 156 deletions(-) diff --git a/src/connector/src/sink/big_query.rs b/src/connector/src/sink/big_query.rs index 22146e86d0d1d..235b1ff5b6539 100644 --- a/src/connector/src/sink/big_query.rs +++ b/src/connector/src/sink/big_query.rs @@ -12,19 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. +use core::pin::Pin; use core::time::Duration; -use std::collections::{BTreeMap, HashMap}; -use std::sync::Arc; +use std::collections::{BTreeMap, HashMap, VecDeque}; use anyhow::{anyhow, Context}; -use async_trait::async_trait; +use futures::future::pending; +use futures::prelude::Future; +use futures::{Stream, StreamExt}; +use futures_async_stream::try_stream; use gcp_bigquery_client::error::BQError; use gcp_bigquery_client::model::query_request::QueryRequest; use gcp_bigquery_client::model::table::Table; use gcp_bigquery_client::model::table_field_schema::TableFieldSchema; use gcp_bigquery_client::model::table_schema::TableSchema; use gcp_bigquery_client::Client; -use google_cloud_bigquery::grpc::apiv1::bigquery_client::StreamingWriteClient; use google_cloud_bigquery::grpc::apiv1::conn_pool::{WriteConnectionManager, DOMAIN}; use google_cloud_gax::conn::{ConnectionOptions, Environment}; use google_cloud_gax::grpc::Request; @@ -32,7 +34,7 @@ use google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_request:: ProtoData, Rows as AppendRowsRequestRows, }; use google_cloud_googleapis::cloud::bigquery::storage::v1::{ - AppendRowsRequest, ProtoRows, ProtoSchema, + AppendRowsRequest, AppendRowsResponse, ProtoRows, ProtoSchema, }; use google_cloud_pubsub::client::google_cloud_auth; use google_cloud_pubsub::client::google_cloud_auth::credentials::CredentialsFile; @@ -42,32 +44,35 @@ use prost_types::{ FileDescriptorSet, }; use risingwave_common::array::{Op, StreamChunk}; -use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{Field, Schema}; use risingwave_common::types::DataType; use serde_derive::Deserialize; use serde_with::{serde_as, DisplayFromStr}; use simd_json::prelude::ArrayTrait; +use tokio::sync::mpsc; +use tonic::{async_trait, Response, Status}; use url::Url; use uuid::Uuid; use with_options::WithOptions; use yup_oauth2::ServiceAccountKey; use super::encoder::{ProtoEncoder, ProtoHeader, RowEncoder, SerTo}; -use super::writer::LogSinkerOf; -use super::{SinkError, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT}; +use super::log_store::{LogStoreReadItem, TruncateOffset}; +use super::{ + LogSinker, SinkError, SinkLogReader, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, +}; use crate::aws_utils::load_file_descriptor_from_s3; use crate::connector_common::AwsAuthProps; -use crate::sink::writer::SinkWriterExt; -use crate::sink::{ - DummySinkCommitCoordinator, Result, Sink, SinkParam, SinkWriter, SinkWriterParam, -}; +use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkParam, SinkWriterParam}; pub const BIGQUERY_SINK: &str = "bigquery"; pub const CHANGE_TYPE: &str = "_CHANGE_TYPE"; const DEFAULT_GRPC_CHANNEL_NUMS: usize = 4; const CONNECT_TIMEOUT: Option = Some(Duration::from_secs(30)); const CONNECTION_TIMEOUT: Option = None; +const BIGQUERY_SEND_FUTURE_BUFFER_MAX_SIZE: usize = 65536; +// < 10MB, we set 8MB +const MAX_ROW_SIZE: usize = 8 * 1024 * 1024; #[serde_as] #[derive(Deserialize, Debug, Clone, WithOptions)] @@ -82,23 +87,100 @@ pub struct BigQueryCommon { pub dataset: String, #[serde(rename = "bigquery.table")] pub table: String, - #[serde(rename = "bigquery.max_batch_rows", default = "default_max_batch_rows")] - #[serde_as(as = "DisplayFromStr")] - pub max_batch_rows: usize, - #[serde(rename = "bigquery.retry_times", default = "default_retry_times")] - #[serde_as(as = "DisplayFromStr")] - pub retry_times: usize, #[serde(default)] // default false #[serde_as(as = "DisplayFromStr")] pub auto_create: bool, } -fn default_max_batch_rows() -> usize { - 1024 +struct BigQueryFutureManager { + // `offset_queue` holds the Some corresponding to each future. + // When TruncateOffset is barrier, the num is 0, we don't need to wait for the return of `resp_stream`. + // When TruncateOffset is chunk: + // 1. chunk has no rows. we didn't send, the num is 0, we don't need to wait for the return of `resp_stream`. + // 2. chunk is less than `MAX_ROW_SIZE`, we only sent once, the num is 1 and we only have to wait once for `resp_stream`. + // 3. chunk is less than `MAX_ROW_SIZE`, we only sent n, the num is n and we need to wait n times for r. + offset_queue: VecDeque<(TruncateOffset, usize)>, + resp_stream: Pin> + Send>>, } +impl BigQueryFutureManager { + pub fn new( + max_future_num: usize, + resp_stream: impl Stream> + Send + 'static, + ) -> Self { + let offset_queue = VecDeque::with_capacity(max_future_num); + Self { + offset_queue, + resp_stream: Box::pin(resp_stream), + } + } + + pub fn add_offset(&mut self, offset: TruncateOffset, resp_num: usize) { + self.offset_queue.push_back((offset, resp_num)); + } -fn default_retry_times() -> usize { - 5 + pub async fn next_offset(&mut self) -> Result { + if let Some((_offset, remaining_resp_num)) = self.offset_queue.front_mut() { + if *remaining_resp_num == 0 { + return Ok(self.offset_queue.pop_front().unwrap().0); + } + while *remaining_resp_num > 0 { + self.resp_stream + .next() + .await + .ok_or_else(|| SinkError::BigQuery(anyhow::anyhow!("end of stream")))??; + *remaining_resp_num -= 1; + } + Ok(self.offset_queue.pop_front().unwrap().0) + } else { + pending().await + } + } +} +pub struct BigQueryLogSinker { + writer: BigQuerySinkWriter, + bigquery_future_manager: BigQueryFutureManager, + future_num: usize, +} +impl BigQueryLogSinker { + pub fn new( + writer: BigQuerySinkWriter, + resp_stream: impl Stream> + Send + 'static, + future_num: usize, + ) -> Self { + Self { + writer, + bigquery_future_manager: BigQueryFutureManager::new(future_num, resp_stream), + future_num, + } + } +} + +#[async_trait] +impl LogSinker for BigQueryLogSinker { + async fn consume_log_and_sink(mut self, log_reader: &mut impl SinkLogReader) -> Result { + loop { + tokio::select!( + offset = self.bigquery_future_manager.next_offset() => { + log_reader.truncate(offset?)?; + } + item_result = log_reader.next_item(), if self.bigquery_future_manager.offset_queue.len() <= self.future_num => { + let (epoch, item) = item_result?; + match item { + LogStoreReadItem::StreamChunk { chunk_id, chunk } => { + let resp_num = self.writer.write_chunk(chunk)?; + self.bigquery_future_manager + .add_offset(TruncateOffset::Chunk { epoch, chunk_id },resp_num); + } + LogStoreReadItem::Barrier { .. } => { + self.bigquery_future_manager + .add_offset(TruncateOffset::Barrier { epoch },0); + } + LogStoreReadItem::UpdateVnodeBitmap(_) => {} + } + } + ) + } + } } impl BigQueryCommon { @@ -116,14 +198,13 @@ impl BigQueryCommon { async fn build_writer_client( &self, aws_auth_props: &AwsAuthProps, - ) -> Result { + ) -> Result<(StorageWriterClient, impl Stream>)> { let auth_json = self.get_auth_json_from_path(aws_auth_props).await?; let credentials_file = CredentialsFile::new_from_str(&auth_json) .await .map_err(|e| SinkError::BigQuery(e.into()))?; - let client = StorageWriterClient::new(credentials_file).await?; - Ok(client) + StorageWriterClient::new(credentials_file).await } async fn get_auth_json_from_path(&self, aws_auth_props: &AwsAuthProps) -> Result { @@ -342,19 +423,23 @@ impl BigQuerySink { impl Sink for BigQuerySink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf; + type LogSinker = BigQueryLogSinker; const SINK_NAME: &'static str = BIGQUERY_SINK; - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { - Ok(BigQuerySinkWriter::new( + async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result { + let (writer, resp_stream) = BigQuerySinkWriter::new( self.config.clone(), self.schema.clone(), self.pk_indices.clone(), self.is_append_only, ) - .await? - .into_log_sinker(writer_param.sink_metrics)) + .await?; + Ok(BigQueryLogSinker::new( + writer, + resp_stream, + BIGQUERY_SEND_FUTURE_BUFFER_MAX_SIZE, + )) } async fn validate(&self) -> Result<()> { @@ -446,8 +531,6 @@ pub struct BigQuerySinkWriter { message_descriptor: MessageDescriptor, write_stream: String, proto_field: Option, - write_rows: Vec, - write_rows_count: usize, } impl TryFrom for BigQuerySink { @@ -471,8 +554,8 @@ impl BigQuerySinkWriter { schema: Schema, pk_indices: Vec, is_append_only: bool, - ) -> Result { - let client = config + ) -> Result<(Self, impl Stream>)> { + let (client, resp_stream) = config .common .build_writer_client(&config.aws_auth_props) .await?; @@ -519,25 +602,26 @@ impl BigQuerySinkWriter { message_descriptor.clone(), ProtoHeader::None, )?; - Ok(Self { - write_stream: format!( - "projects/{}/datasets/{}/tables/{}/streams/_default", - config.common.project, config.common.dataset, config.common.table - ), - config, - schema, - pk_indices, - client, - is_append_only, - row_encoder, - message_descriptor, - proto_field, - writer_pb_schema: ProtoSchema { - proto_descriptor: Some(descriptor_proto), + Ok(( + Self { + write_stream: format!( + "projects/{}/datasets/{}/tables/{}/streams/_default", + config.common.project, config.common.dataset, config.common.table + ), + config, + schema, + pk_indices, + client, + is_append_only, + row_encoder, + message_descriptor, + proto_field, + writer_pb_schema: ProtoSchema { + proto_descriptor: Some(descriptor_proto), + }, }, - write_rows: vec![], - write_rows_count: 0, - }) + resp_stream, + )) } fn append_only(&mut self, chunk: StreamChunk) -> Result>> { @@ -588,82 +672,96 @@ impl BigQuerySinkWriter { Ok(serialized_rows) } - async fn write_rows(&mut self) -> Result<()> { - if self.write_rows.is_empty() { - return Ok(()); - } - let mut errs = Vec::with_capacity(self.config.common.retry_times); - for _ in 0..self.config.common.retry_times { - match self - .client - .append_rows(self.write_rows.clone(), self.write_stream.clone()) - .await - { - Ok(_) => { - self.write_rows_count = 0; - self.write_rows.clear(); - return Ok(()); - } - Err(e) => errs.push(e), - } - } - Err(SinkError::BigQuery(anyhow::anyhow!( - "Insert error {:?}", - errs - ))) - } -} - -#[async_trait] -impl SinkWriter for BigQuerySinkWriter { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { + fn write_chunk(&mut self, chunk: StreamChunk) -> Result { let serialized_rows = if self.is_append_only { self.append_only(chunk)? } else { self.upsert(chunk)? }; - if !serialized_rows.is_empty() { - self.write_rows_count += serialized_rows.len(); + if serialized_rows.is_empty() { + return Ok(0); + } + let mut result = Vec::new(); + let mut result_inner = Vec::new(); + let mut size_count = 0; + for i in serialized_rows { + size_count += i.len(); + if size_count > MAX_ROW_SIZE { + result.push(result_inner); + result_inner = Vec::new(); + size_count = i.len(); + } + result_inner.push(i); + } + if !result_inner.is_empty() { + result.push(result_inner); + } + let len = result.len(); + for serialized_rows in result { let rows = AppendRowsRequestRows::ProtoRows(ProtoData { writer_schema: Some(self.writer_pb_schema.clone()), rows: Some(ProtoRows { serialized_rows }), }); - self.write_rows.push(rows); - - if self.write_rows_count >= self.config.common.max_batch_rows { - self.write_rows().await?; - } + self.client.append_rows(rows, self.write_stream.clone())?; } - Ok(()) - } - - async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> { - Ok(()) - } - - async fn abort(&mut self) -> Result<()> { - Ok(()) + Ok(len) } +} - async fn barrier(&mut self, is_checkpoint: bool) -> Result<()> { - if is_checkpoint { - self.write_rows().await?; +#[try_stream(ok = (), error = SinkError)] +pub async fn resp_to_stream( + resp_stream: impl Future< + Output = std::result::Result< + Response>, + Status, + >, + > + + 'static + + Send, +) { + let mut resp_stream = resp_stream + .await + .map_err(|e| SinkError::BigQuery(e.into()))? + .into_inner(); + loop { + match resp_stream + .message() + .await + .map_err(|e| SinkError::BigQuery(e.into()))? + { + Some(append_rows_response) => { + if !append_rows_response.row_errors.is_empty() { + return Err(SinkError::BigQuery(anyhow::anyhow!( + "bigquery insert error {:?}", + append_rows_response.row_errors + ))); + } + if let Some(google_cloud_googleapis::cloud::bigquery::storage::v1::append_rows_response::Response::Error(status)) = append_rows_response.response{ + return Err(SinkError::BigQuery(anyhow::anyhow!( + "bigquery insert error {:?}", + status + ))); + } + yield (); + } + None => { + return Err(SinkError::BigQuery(anyhow::anyhow!( + "bigquery insert error: end of resp stream", + ))); + } } - Ok(()) - } - - async fn update_vnode_bitmap(&mut self, _vnode_bitmap: Arc) -> Result<()> { - Ok(()) } } struct StorageWriterClient { - client: StreamingWriteClient, #[expect(dead_code)] environment: Environment, + request_sender: mpsc::UnboundedSender, } impl StorageWriterClient { - pub async fn new(credentials: CredentialsFile) -> Result { + pub async fn new( + credentials: CredentialsFile, + ) -> Result<(Self, impl Stream>)> { let ts_grpc = google_cloud_auth::token::DefaultTokenSourceProvider::new_with_credentials( Self::bigquery_grpc_auth_config(), Box::new(credentials), @@ -683,49 +781,34 @@ impl StorageWriterClient { ) .await .map_err(|e| SinkError::BigQuery(e.into()))?; - let client = conn.conn(); - Ok(StorageWriterClient { - client, - environment, - }) + let mut client = conn.conn(); + + let (tx, rx) = mpsc::unbounded_channel(); + let stream = tokio_stream::wrappers::UnboundedReceiverStream::new(rx); + + let resp = async move { client.append_rows(Request::new(stream)).await }; + let resp_stream = resp_to_stream(resp); + + Ok(( + StorageWriterClient { + environment, + request_sender: tx, + }, + resp_stream, + )) } - pub async fn append_rows( - &mut self, - rows: Vec, - write_stream: String, - ) -> Result<()> { - let mut resp_count = rows.len(); - let append_req: Vec = rows - .into_iter() - .map(|row| AppendRowsRequest { - write_stream: write_stream.clone(), - offset: None, - trace_id: Uuid::new_v4().hyphenated().to_string(), - missing_value_interpretations: HashMap::default(), - rows: Some(row), - }) - .collect(); - let mut resp = self - .client - .append_rows(Request::new(tokio_stream::iter(append_req))) - .await - .map_err(|e| SinkError::BigQuery(e.into()))? - .into_inner(); - while let Some(append_rows_response) = resp - .message() - .await - .map_err(|e| SinkError::BigQuery(e.into()))? - { - resp_count -= 1; - if !append_rows_response.row_errors.is_empty() { - return Err(SinkError::BigQuery(anyhow::anyhow!( - "Insert error {:?}", - append_rows_response.row_errors - ))); - } - } - assert_eq!(resp_count,0,"bigquery sink insert error: the number of response inserted is not equal to the number of request"); + pub fn append_rows(&mut self, row: AppendRowsRequestRows, write_stream: String) -> Result<()> { + let append_req = AppendRowsRequest { + write_stream: write_stream.clone(), + offset: None, + trace_id: Uuid::new_v4().hyphenated().to_string(), + missing_value_interpretations: HashMap::default(), + rows: Some(row), + }; + self.request_sender + .send(append_req) + .map_err(|e| SinkError::BigQuery(e.into()))?; Ok(()) } diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index d028ef5e30198..cc92f9a0a664a 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -41,14 +41,6 @@ BigQueryConfig: - name: bigquery.table field_type: String required: true - - name: bigquery.max_batch_rows - field_type: usize - required: false - default: '1024' - - name: bigquery.retry_times - field_type: usize - required: false - default: '5' - name: auto_create field_type: bool required: false From c843edcd298be55a0bdc938dfa3c240676fba9c1 Mon Sep 17 00:00:00 2001 From: xxchan Date: Tue, 3 Sep 2024 19:45:27 +0800 Subject: [PATCH 13/26] feat: support arrow map -> rw map (#18375) Signed-off-by: xxchan --- src/common/src/array/arrow/arrow_impl.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/common/src/array/arrow/arrow_impl.rs b/src/common/src/array/arrow/arrow_impl.rs index 7d69b50afed49..acc39bc951975 100644 --- a/src/common/src/array/arrow/arrow_impl.rs +++ b/src/common/src/array/arrow/arrow_impl.rs @@ -514,6 +514,12 @@ pub trait FromArrow { LargeBinary => self.from_large_binary()?, List(field) => DataType::List(Box::new(self.from_field(field)?)), Struct(fields) => DataType::Struct(self.from_fields(fields)?), + Map(field, _is_sorted) => { + let entries = self.from_field(field)?; + DataType::Map(MapType::try_from_entries(entries).map_err(|e| { + ArrayError::from_arrow(format!("invalid arrow map field: {field:?}, err: {e}")) + })?) + } t => { return Err(ArrayError::from_arrow(format!( "unsupported arrow data type: {t:?}" @@ -588,6 +594,7 @@ pub trait FromArrow { LargeBinary => self.from_large_binary_array(array.as_any().downcast_ref().unwrap()), List(_) => self.from_list_array(array.as_any().downcast_ref().unwrap()), Struct(_) => self.from_struct_array(array.as_any().downcast_ref().unwrap()), + Map(_, _) => self.from_map_array(array.as_any().downcast_ref().unwrap()), t => Err(ArrayError::from_arrow(format!( "unsupported arrow data type: {t:?}", ))), @@ -754,6 +761,21 @@ pub trait FromArrow { (0..array.len()).map(|i| array.is_valid(i)).collect(), ))) } + + fn from_map_array(&self, array: &arrow_array::MapArray) -> Result { + use arrow_array::Array; + let struct_array = self.from_struct_array(array.entries())?; + let list_array = ListArray { + value: Box::new(struct_array), + bitmap: match array.nulls() { + Some(nulls) => nulls.iter().collect(), + None => Bitmap::ones(array.len()), + }, + offsets: array.offsets().iter().map(|o| *o as u32).collect(), + }; + + Ok(ArrayImpl::Map(MapArray { inner: list_array })) + } } impl From<&Bitmap> for arrow_buffer::NullBuffer { From e9d77a4d5166f78cb65d6b3a8d6d5d67f56b5b05 Mon Sep 17 00:00:00 2001 From: stonepage <40830455+st1page@users.noreply.github.com> Date: Wed, 4 Sep 2024 00:31:12 +0800 Subject: [PATCH 14/26] feat(streaming): add `must_output_per_barrier` flag for stream simple agg (#18374) Co-authored-by: Eric Fu --- proto/stream_plan.proto | 3 + .../tests/testdata/output/agg.yaml | 200 +++++++++--------- .../tests/testdata/output/append_only.yaml | 2 +- .../tests/testdata/output/bushy_join.yaml | 2 +- .../tests/testdata/output/ch_benchmark.yaml | 35 +-- .../tests/testdata/output/cse_expr.yaml | 2 +- .../tests/testdata/output/dynamic_filter.yaml | 16 +- .../testdata/output/lateral_subquery.yaml | 2 +- .../tests/testdata/output/limit.yaml | 4 +- .../tests/testdata/output/mv_column_name.yaml | 2 +- .../tests/testdata/output/nexmark.yaml | 8 +- .../tests/testdata/output/nexmark_source.yaml | 8 +- .../output/nexmark_temporal_filter.yaml | 8 +- .../testdata/output/nexmark_watermark.yaml | 8 +- .../tests/testdata/output/share.yaml | 16 +- .../testdata/output/stream_dist_agg.yaml | 72 +++---- .../tests/testdata/output/temporal_join.yaml | 2 +- .../tests/testdata/output/tpch.yaml | 32 +-- .../src/optimizer/plan_node/logical_agg.rs | 72 ++++--- .../optimizer/plan_node/stream_simple_agg.rs | 22 +- .../plan_node/stream_stateless_simple_agg.rs | 1 + src/stream/src/executor/agg_common.rs | 4 +- src/stream/src/executor/integration_tests.rs | 1 + src/stream/src/executor/simple_agg.rs | 95 ++++++++- src/stream/src/executor/test_utils.rs | 5 +- src/stream/src/from_proto/simple_agg.rs | 5 +- 26 files changed, 380 insertions(+), 247 deletions(-) diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto index 4148e78690745..5ea2f018eee20 100644 --- a/proto/stream_plan.proto +++ b/proto/stream_plan.proto @@ -365,6 +365,9 @@ message SimpleAggNode { map distinct_dedup_tables = 6; uint32 row_count_index = 7; AggNodeVersion version = 8; + // Required by the downstream `RowMergeNode`, + // currently only used by the `approx_percentile`'s two phase plan + bool must_output_per_barrier = 9; } message HashAggNode { diff --git a/src/frontend/planner_test/tests/testdata/output/agg.yaml b/src/frontend/planner_test/tests/testdata/output/agg.yaml index e44426caa3a49..da2a391a8c603 100644 --- a/src/frontend/planner_test/tests/testdata/output/agg.yaml +++ b/src/frontend/planner_test/tests/testdata/output/agg.yaml @@ -71,7 +71,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [agg], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v2)) * sum0(count(t.v3)))) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v2)) * sum0(count(t.v3)))) as $expr1] } └─StreamSimpleAgg { aggs: [min(min(t.v1)), max(max(t.v2)), sum0(count(t.v3)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [min(t.v1), max(t.v2), count(t.v3), count] } @@ -273,7 +273,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [cnt, sum], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count($expr1)), sum(sum($expr1))], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count($expr1)), sum(sum($expr1))] } └─StreamSimpleAgg { aggs: [sum0(count($expr1)), sum(sum($expr1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count($expr1), sum($expr1)] } @@ -571,7 +571,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [agg], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v3)) * sum0(count(t.v2)))) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(min(min(t.v1)) + (max(max(t.v3)) * sum0(count(t.v2)))) as $expr1] } └─StreamSimpleAgg { aggs: [min(min(t.v1)), max(max(t.v3)), sum0(count(t.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [min(t.v1), max(t.v3), count(t.v2), count] } @@ -628,7 +628,7 @@ └─LogicalScan { table: t, columns: [t.v1] } stream_plan: |- StreamMaterialize { columns: [s1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } @@ -647,7 +647,7 @@ └─LogicalScan { table: t, columns: [t.v1] } stream_plan: |- StreamMaterialize { columns: [s1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } @@ -666,7 +666,7 @@ └─LogicalScan { table: t, columns: [t.v1] } stream_plan: |- StreamMaterialize { columns: [s1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } @@ -685,7 +685,7 @@ └─LogicalScan { table: t, columns: [t.v1] } stream_plan: |- StreamMaterialize { columns: [sa], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1) filter((t.v1 > 0:Int32)))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1) filter((t.v1 > 0:Int32)))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v1) filter((t.v1 > 0:Int32))), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1) filter((t.v1 > 0:Int32))] } @@ -720,7 +720,7 @@ └─LogicalScan { table: t, columns: [t.a, t.b] } stream_plan: |- StreamMaterialize { columns: [sab], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(max($expr1) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))))], noop_update_hint: true } + └─StreamProject { exprs: [max(max($expr1) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))))] } └─StreamSimpleAgg { aggs: [max(max($expr1) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32)))), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [$expr2], aggs: [max($expr1) filter((t.a < t.b) AND ((t.a + t.b) < 100:Int32) AND ((t.a * t.b) <> ((t.a + t.b) - 1:Int32))), count] } @@ -759,7 +759,7 @@ └─LogicalScan { table: t, columns: [t.a, t.b] } stream_plan: |- StreamMaterialize { columns: [cnt_agb], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count filter((t.a > t.b)))], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count filter((t.a > t.b)))] } └─StreamSimpleAgg { aggs: [sum0(count filter((t.a > t.b))), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count filter((t.a > t.b))] } @@ -813,7 +813,7 @@ └─BatchScan { table: t, columns: [t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [b], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v2) filter((t.v2 < 5:Int32)))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v2) filter((t.v2 < 5:Int32)))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v2) filter((t.v2 < 5:Int32))), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v2) filter((t.v2 < 5:Int32))] } @@ -896,7 +896,7 @@ └─BatchScan { table: t, columns: [t.x, t.y], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [string_agg, count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [string_agg(t.y, ',':Varchar), count(distinct t.x)], noop_update_hint: true } + └─StreamProject { exprs: [string_agg(t.y, ',':Varchar), count(distinct t.x)] } └─StreamSimpleAgg { aggs: [string_agg(t.y, ',':Varchar), count(distinct t.x), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t.y, ',':Varchar, t.x, t._row_id] } @@ -917,7 +917,7 @@ └─BatchScan { table: t, columns: [t.x, t.y], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [string_agg, count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [string_agg(t.y, ',':Varchar order_by(t.y ASC)), count(distinct t.x)], noop_update_hint: true } + └─StreamProject { exprs: [string_agg(t.y, ',':Varchar order_by(t.y ASC)), count(distinct t.x)] } └─StreamSimpleAgg { aggs: [string_agg(t.y, ',':Varchar order_by(t.y ASC)), count(distinct t.x), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t.y, ',':Varchar, t.x, t._row_id] } @@ -938,7 +938,7 @@ └─BatchScan { table: t, columns: [t.x, t.y], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [string_agg, count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [string_agg(distinct t.y, ',':Varchar order_by(t.y ASC)), count(distinct t.x)], noop_update_hint: true } + └─StreamProject { exprs: [string_agg(distinct t.y, ',':Varchar order_by(t.y ASC)), count(distinct t.x)] } └─StreamSimpleAgg { aggs: [string_agg(distinct t.y, ',':Varchar order_by(t.y ASC)), count(distinct t.x), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t.y, ',':Varchar, t.x, t._row_id] } @@ -1006,7 +1006,7 @@ └─LogicalScan { table: t, columns: [t.a, t.b] } stream_plan: |- StreamMaterialize { columns: [s1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum($expr1) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32)))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum($expr1) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32)))] } └─StreamSimpleAgg { aggs: [sum(sum($expr1) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1) filter((t.b < 100:Int32) AND ((t.b * 2:Int32) > 10:Int32))] } @@ -1313,7 +1313,7 @@ stream_plan: |- StreamMaterialize { columns: [stddev_samp, stddev_pop], stream_key: [], pk_columns: [], pk_conflict: NoCheck } └─StreamProject { exprs: [Case((sum0(count(t.v1)) <= 1:Int64), null:Decimal, Sqrt(((sum(sum($expr1))::Decimal - (($expr2 * $expr2) / $expr3)) / (sum0(count(t.v1)) - 1:Int64)::Decimal))) as $expr4, Sqrt(((sum(sum($expr1))::Decimal - (($expr2 * $expr2) / $expr3)) / $expr3)) as $expr5] } - └─StreamProject { exprs: [sum(sum($expr1)), sum(sum(t.v1)), sum0(count(t.v1)), sum(sum(t.v1))::Decimal as $expr2, sum0(count(t.v1))::Decimal as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum($expr1)), sum(sum(t.v1)), sum0(count(t.v1)), sum(sum(t.v1))::Decimal as $expr2, sum0(count(t.v1))::Decimal as $expr3] } └─StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum(t.v1)), sum0(count(t.v1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1), sum(t.v1), count(t.v1)] } @@ -1370,7 +1370,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [min, sum], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [min(min(t.v1)), sum(sum(t.v2))], noop_update_hint: true } + └─StreamProject { exprs: [min(min(t.v1)), sum(sum(t.v2))] } └─StreamSimpleAgg { aggs: [min(min(t.v1)), sum(sum(t.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [min(t.v1), sum(t.v2), count] } @@ -1388,7 +1388,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [min, sum], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [min(t.v1), sum(t.v2)], noop_update_hint: true } + └─StreamProject { exprs: [min(t.v1), sum(t.v2)] } └─StreamSimpleAgg { aggs: [min(t.v1), sum(t.v2), count] } └─StreamExchange { dist: Single } └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } @@ -1677,7 +1677,7 @@ └─BatchScan { table: t, columns: [t.x, t.y], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [first_value], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [first_value(t.x order_by(t.y ASC))], noop_update_hint: true } + └─StreamProject { exprs: [first_value(t.x order_by(t.y ASC))] } └─StreamSimpleAgg { aggs: [first_value(t.x order_by(t.y ASC)), count] } └─StreamExchange { dist: Single } └─StreamTableScan { table: t, columns: [t.x, t.y, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } @@ -1685,7 +1685,7 @@ Fragment 0 StreamMaterialize { columns: [first_value], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [first_value(t.x order_by(t.y ASC))], noop_update_hint: true } + └── StreamProject { exprs: [first_value(t.x order_by(t.y ASC))] } └── StreamSimpleAgg { aggs: [first_value(t.x order_by(t.y ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 @@ -1717,7 +1717,7 @@ Fragment 0 StreamMaterialize { columns: [first_value], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [first_value(distinct t.x order_by(t.x ASC))], noop_update_hint: true } + └── StreamProject { exprs: [first_value(distinct t.x order_by(t.x ASC))] } └── StreamSimpleAgg { aggs: [first_value(distinct t.x order_by(t.x ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0, SimpleAggDedupForCol0: 2 ] └── StreamExchange Single from 1 @@ -1753,7 +1753,7 @@ └─BatchScan { table: t, columns: [t.x, t.y], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [last_value], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [last_value(t.x order_by(t.y DESC NULLS LAST))], noop_update_hint: true } + └─StreamProject { exprs: [last_value(t.x order_by(t.y DESC NULLS LAST))] } └─StreamSimpleAgg { aggs: [last_value(t.x order_by(t.y DESC NULLS LAST)), count] } └─StreamExchange { dist: Single } └─StreamTableScan { table: t, columns: [t.x, t.y, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } @@ -1874,7 +1874,7 @@ └─LogicalScan { table: t, columns: [t.v1, t._row_id] } stream_plan: |- StreamMaterialize { columns: [x, y, z, w], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1)), sum0(count(t.v1)), sum(sum(t.v1)), sum0(count(t.v1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1)), sum0(count(t.v1)), sum(sum(t.v1)), sum0(count(t.v1))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count(t.v1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1), count(t.v1)] } @@ -1895,12 +1895,11 @@ └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [approx_percentile], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [approx_percentile], noop_update_hint: true } - └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - └─StreamExchange { dist: Single } - └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - └─StreamProject { exprs: [t.v1::Float64 as $expr1, t._row_id] } - └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + └─StreamExchange { dist: Single } + └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + └─StreamProject { exprs: [t.v1::Float64 as $expr1, t._row_id] } + └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - name: test simple approx_percentile with other simple aggs sql: | CREATE TABLE t (v1 int); @@ -1917,20 +1916,19 @@ └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [approx_percentile, sum], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [approx_percentile, sum(sum(t.v1))], noop_update_hint: true } - └─StreamRowMerge { output: [approx_percentile:Float64, sum(sum(t.v1)):Int64] } - ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ └─StreamExchange { dist: Single } - │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ └─StreamShare { id: 2 } - │ └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v1, t._row_id] } - │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count] } - └─StreamExchange { dist: Single } - └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } - └─StreamShare { id: 2 } - └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v1, t._row_id] } - └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamRowMerge { output: [approx_percentile:Float64, sum(sum(t.v1)):Int64] } + ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ └─StreamExchange { dist: Single } + │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ └─StreamShare { id: 2 } + │ └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v1, t._row_id] } + │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count], must_output_per_barrier: true } + └─StreamExchange { dist: Single } + └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } + └─StreamShare { id: 2 } + └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v1, t._row_id] } + └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - name: test simple approx_percentile with other simple aggs (sum, count) sql: | CREATE TABLE t (v1 int); @@ -1948,7 +1946,7 @@ └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [s1, approx_percentile, s2, count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum(sum(t.v1)), sum0(count(t.v1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum(sum(t.v1)), sum0(count(t.v1))] } └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count(t.v1)):Int64] } ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } │ └─StreamExchange { dist: Single } @@ -1956,7 +1954,7 @@ │ └─StreamShare { id: 2 } │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] } │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count(t.v1)), count] } + └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count(t.v1)), count], must_output_per_barrier: true } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1), count(t.v1)] } └─StreamShare { id: 2 } @@ -1973,7 +1971,7 @@ └─LogicalScan { table: t, columns: [t.v1, t._row_id] } stream_plan: |- StreamMaterialize { columns: [x, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [approx_percentile, approx_percentile], noop_update_hint: true } + └─StreamProject { exprs: [approx_percentile, approx_percentile] } └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } └─StreamExchange { dist: Single } └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } @@ -1995,20 +1993,19 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [x, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [approx_percentile, approx_percentile], noop_update_hint: true } - └─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] } - ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ └─StreamExchange { dist: Single } - │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ └─StreamShare { id: 2 } - │ └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v2::Float64 as $expr2, t._row_id] } - │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - └─StreamExchange { dist: Single } - └─StreamLocalApproxPercentile { percentile_col: $expr2, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - └─StreamShare { id: 2 } - └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v2::Float64 as $expr2, t._row_id] } - └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] } + ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ └─StreamExchange { dist: Single } + │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ └─StreamShare { id: 2 } + │ └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v2::Float64 as $expr2, t._row_id] } + │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + └─StreamExchange { dist: Single } + └─StreamLocalApproxPercentile { percentile_col: $expr2, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + └─StreamShare { id: 2 } + └─StreamProject { exprs: [t.v1::Float64 as $expr1, t.v2::Float64 as $expr2, t._row_id] } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - name: test simple approx_percentile with different approx_percentile interleaved with stateless simple aggs sql: | CREATE TABLE t (v1 int, v2 int); @@ -2026,7 +2023,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [s1, x, count, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum0(count), (sum(sum(t.v2))::Float64 + approx_percentile) as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum0(count), (sum(sum(t.v2))::Float64 + approx_percentile) as $expr3] } └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count):Int64, sum(sum(t.v2)):Int64, approx_percentile:Float64] } ├─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] } │ ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } @@ -2041,7 +2038,7 @@ │ └─StreamShare { id: 2 } │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] } │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), sum(sum(t.v2)), count] } + └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), sum(sum(t.v2)), count], must_output_per_barrier: true } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1), count, sum(t.v2)] } └─StreamShare { id: 2 } @@ -2064,7 +2061,7 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [s1, x, count, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum0(count), (sum(sum(t.v2))::Float64 + approx_percentile) as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum0(count), (sum(sum(t.v2))::Float64 + approx_percentile) as $expr3] } └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count):Int64, sum(sum(t.v2)):Int64, approx_percentile:Float64] } ├─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] } │ ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } @@ -2079,7 +2076,7 @@ │ └─StreamShare { id: 2 } │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] } │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), sum(sum(t.v2)), count] } + └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), sum(sum(t.v2)), count], must_output_per_barrier: true } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v1), count, sum(t.v2)] } └─StreamShare { id: 2 } @@ -2101,20 +2098,19 @@ └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [s1, approx_percentile], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile], noop_update_hint: true } - └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64] } - ├─StreamGlobalApproxPercentile { quantile: 0.8:Float64, relative_error: 0.01:Float64 } - │ └─StreamExchange { dist: Single } - │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.8:Float64, relative_error: 0.01:Float64 } - │ └─StreamShare { id: 2 } - │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] } - │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count] } - └─StreamExchange { dist: Single } - └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } - └─StreamShare { id: 2 } - └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] } - └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64] } + ├─StreamGlobalApproxPercentile { quantile: 0.8:Float64, relative_error: 0.01:Float64 } + │ └─StreamExchange { dist: Single } + │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.8:Float64, relative_error: 0.01:Float64 } + │ └─StreamShare { id: 2 } + │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] } + │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), count], must_output_per_barrier: true } + └─StreamExchange { dist: Single } + └─StreamStatelessSimpleAgg { aggs: [sum(t.v1)] } + └─StreamShare { id: 2 } + └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] } + └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - name: test simple approx_percentile with different approx_percentile interleaved with stateless + stateful simple aggs sql: | CREATE TABLE t (v1 int, v2 int); @@ -2131,26 +2127,25 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [s1, x, count, m2, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v1)), approx_percentile, sum0(count), max(max(t.v2)), approx_percentile], noop_update_hint: true } - └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count):Int64, max(max(t.v2)):Int32, approx_percentile:Float64] } - ├─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] } - │ ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ │ └─StreamExchange { dist: Single } - │ │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ │ └─StreamShare { id: 2 } - │ │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] } - │ │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - │ └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ └─StreamExchange { dist: Single } - │ └─StreamLocalApproxPercentile { percentile_col: $expr2, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - │ └─StreamShare { id: 2 } - │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] } - │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), max(max(t.v2)), count] } - └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [$expr5], aggs: [sum(t.v1), count, max(t.v2)] } - └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr3, t.v2, t.v2::Float64 as $expr4, t._row_id, Vnode(t._row_id) as $expr5] } - └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count):Int64, max(max(t.v2)):Int32, approx_percentile:Float64] } + ├─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] } + │ ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ │ └─StreamExchange { dist: Single } + │ │ └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ │ └─StreamShare { id: 2 } + │ │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] } + │ │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + │ └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ └─StreamExchange { dist: Single } + │ └─StreamLocalApproxPercentile { percentile_col: $expr2, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + │ └─StreamShare { id: 2 } + │ └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] } + │ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), max(max(t.v2)), count], must_output_per_barrier: true } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr5], aggs: [sum(t.v1), count, max(t.v2)] } + └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr3, t.v2, t.v2::Float64 as $expr4, t._row_id, Vnode(t._row_id) as $expr5] } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } - name: test hash approx_percentile sql: | CREATE TABLE t (v1 int, v2 int); @@ -2198,9 +2193,8 @@ └─BatchScan { table: t, columns: [t.v1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [approx_percentile], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [approx_percentile], noop_update_hint: true } - └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } - └─StreamExchange { dist: Single } - └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } - └─StreamProject { exprs: [t.v1::Float64 as $expr1, t._row_id] } - └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 } + └─StreamExchange { dist: Single } + └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 } + └─StreamProject { exprs: [t.v1::Float64 as $expr1, t._row_id] } + └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/append_only.yaml b/src/frontend/planner_test/tests/testdata/output/append_only.yaml index e76813e05f759..d0701675c3617 100644 --- a/src/frontend/planner_test/tests/testdata/output/append_only.yaml +++ b/src/frontend/planner_test/tests/testdata/output/append_only.yaml @@ -33,7 +33,7 @@ select max(v1) as max_v1 from t1; stream_plan: |- StreamMaterialize { columns: [max_v1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(max(t1.v1))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t1.v1))] } └─StreamSimpleAgg [append_only] { aggs: [max(max(t1.v1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [max(t1.v1)] } diff --git a/src/frontend/planner_test/tests/testdata/output/bushy_join.yaml b/src/frontend/planner_test/tests/testdata/output/bushy_join.yaml index a785ac443901a..9d042f1e60c8b 100644 --- a/src/frontend/planner_test/tests/testdata/output/bushy_join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/bushy_join.yaml @@ -8,7 +8,7 @@ sql: select count(*) from t t1 join t t2 on t1.id = t2.id join t t3 on t1.id = t3.id join t t4 on t1.id = t4.id join t t5 on t1.id = t5.id join t t6 on t1.id = t6.id join t t7 on t1.id = t7.id join t t8 on t1.id = t8.id; stream_plan: |- StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } diff --git a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml index 3b96806aabc7a..ce98b8bea75c9 100644 --- a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml @@ -869,7 +869,7 @@ └─BatchScan { table: order_line, columns: [order_line.ol_amount, order_line.ol_delivery_d, order_line.ol_quantity], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(order_line.ol_amount))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(order_line.ol_amount))] } └─StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } @@ -880,7 +880,7 @@ Fragment 0 StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum(sum(order_line.ol_amount))], noop_update_hint: true } + └── StreamProject { exprs: [sum(sum(order_line.ol_amount))] } └── StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -1940,7 +1940,7 @@ │ └─StreamProject { exprs: [stock.s_i_id, stock.s_order_cnt, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] } │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_order_cnt], stream_scan_type: ArrangementBackfill, stream_key: [stock.s_w_id, stock.s_i_id], pk: [s_w_id, s_i_id], dist: UpstreamHashShard(stock.s_w_id, stock.s_i_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum(sum(stock.s_order_cnt))::Decimal * 0.005:Decimal) as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [(sum(sum(stock.s_order_cnt))::Decimal * 0.005:Decimal) as $expr3] } └─StreamSimpleAgg { aggs: [sum(sum(stock.s_order_cnt)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(stock.s_order_cnt)] } @@ -2008,7 +2008,7 @@ └── BatchPlanNode Fragment 7 - StreamProject { exprs: [(sum(sum(stock.s_order_cnt))::Decimal * 0.005:Decimal) as $expr3], noop_update_hint: true } + StreamProject { exprs: [(sum(sum(stock.s_order_cnt))::Decimal * 0.005:Decimal) as $expr3] } └── StreamSimpleAgg { aggs: [sum(sum(stock.s_order_cnt)), count] } { tables: [ SimpleAggState: 14 ] } └── StreamExchange Single from 8 @@ -2265,7 +2265,7 @@ └─BatchScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_amount, order_line.ol_delivery_d], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [promo_revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / (1:Decimal + sum(sum(order_line.ol_amount)))) as $expr2], noop_update_hint: true } + └─StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / (1:Decimal + sum(sum(order_line.ol_amount)))) as $expr2] } └─StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum(order_line.ol_amount)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1), sum(order_line.ol_amount)] } @@ -2279,9 +2279,11 @@ └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], stream_scan_type: ArrangementBackfill, stream_key: [item.i_id], pk: [i_id], dist: UpstreamHashShard(item.i_id) } stream_dist_plan: |+ Fragment 0 - StreamMaterialize { columns: [promo_revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / (1:Decimal + sum(sum(order_line.ol_amount)))) as $expr2], noop_update_hint: true } - └── StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum(order_line.ol_amount)), count] } { tables: [ SimpleAggState: 0 ] } + StreamMaterialize { columns: [promo_revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } + ├── tables: [ Materialize: 4294967294 ] + └── StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / (1:Decimal + sum(sum(order_line.ol_amount)))) as $expr2] } + └── StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum(order_line.ol_amount)), count] } + ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 Fragment 1 @@ -2356,7 +2358,7 @@ │ └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] } │ └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], stream_scan_type: ArrangementBackfill, stream_key: [revenue1.supplier_no], pk: [supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) } - └─StreamProject { exprs: [max(max(revenue1.total_revenue))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(revenue1.total_revenue))] } └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(revenue1.total_revenue), count] } @@ -2394,7 +2396,7 @@ └── BatchPlanNode Fragment 5 - StreamProject { exprs: [max(max(revenue1.total_revenue))], noop_update_hint: true } + StreamProject { exprs: [max(max(revenue1.total_revenue))] } └── StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } { tables: [ SimpleAggState: 11, SimpleAggCall0: 10 ] } └── StreamExchange Single from 6 @@ -2626,7 +2628,7 @@ └─BatchScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [avg_yearly], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [(sum(sum(order_line.ol_amount)) / 2.0:Decimal) as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [(sum(sum(order_line.ol_amount)) / 2.0:Decimal) as $expr3] } └─StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } @@ -2649,8 +2651,9 @@ Fragment 0 StreamMaterialize { columns: [avg_yearly], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [(sum(sum(order_line.ol_amount)) / 2.0:Decimal) as $expr3], noop_update_hint: true } - └── StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } { tables: [ SimpleAggState: 0 ] } + └── StreamProject { exprs: [(sum(sum(order_line.ol_amount)) / 2.0:Decimal) as $expr3] } + └── StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } + ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 Fragment 1 @@ -2858,7 +2861,7 @@ └─BatchScan { table: order_line, columns: [order_line.ol_w_id, order_line.ol_i_id, order_line.ol_amount, order_line.ol_quantity], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(order_line.ol_amount))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(order_line.ol_amount))] } └─StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(order_line.ol_amount)] } @@ -2877,7 +2880,7 @@ Fragment 0 StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum(sum(order_line.ol_amount))], noop_update_hint: true } + └── StreamProject { exprs: [sum(sum(order_line.ol_amount))] } └── StreamSimpleAgg { aggs: [sum(sum(order_line.ol_amount)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -3407,7 +3410,7 @@ │ └─StreamProject { exprs: [orders.o_c_id, orders.o_w_id, orders.o_d_id, orders.o_id] } │ └─StreamTableScan { table: orders, columns: [orders.o_d_id, orders.o_w_id, orders.o_c_id, orders.o_id], stream_scan_type: ArrangementBackfill, stream_key: [orders.o_w_id, orders.o_d_id, orders.o_id], pk: [o_w_id, o_d_id, o_id], dist: UpstreamHashShard(orders.o_w_id, orders.o_d_id, orders.o_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum(sum(customer.c_balance)) / sum0(count(customer.c_balance))::Decimal) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(sum(sum(customer.c_balance)) / sum0(count(customer.c_balance))::Decimal) as $expr1] } └─StreamSimpleAgg { aggs: [sum(sum(customer.c_balance)), sum0(count(customer.c_balance)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(customer.c_balance), count(customer.c_balance)] } diff --git a/src/frontend/planner_test/tests/testdata/output/cse_expr.yaml b/src/frontend/planner_test/tests/testdata/output/cse_expr.yaml index ce97db3c0d33e..abbc0aae184e0 100644 --- a/src/frontend/planner_test/tests/testdata/output/cse_expr.yaml +++ b/src/frontend/planner_test/tests/testdata/output/cse_expr.yaml @@ -74,7 +74,7 @@ └─StreamProject { exprs: [Sqrt($expr5) as $expr6, Case((sum0(count(t.v)) <= 1:Int64), null:Decimal, Sqrt(($expr4 / (sum0(count(t.v)) - 1:Int64)::Decimal))) as $expr7, $expr5, Case((sum0(count(t.v)) <= 1:Int64), null:Decimal, ($expr4 / (sum0(count(t.v)) - 1:Int64)::Decimal)) as $expr8] } └─StreamProject { exprs: [sum(sum($expr1)), sum(sum(t.v)), sum0(count(t.v)), ($expr4 / $expr3) as $expr5, $expr4] } └─StreamProject { exprs: [sum(sum($expr1)), sum(sum(t.v)), sum0(count(t.v)), (sum(sum($expr1))::Decimal - (($expr2 * $expr2) / $expr3)) as $expr4, $expr3] } - └─StreamProject { exprs: [sum(sum($expr1)), sum(sum(t.v)), sum0(count(t.v)), sum(sum(t.v))::Decimal as $expr2, sum0(count(t.v))::Decimal as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum($expr1)), sum(sum(t.v)), sum0(count(t.v)), sum(sum(t.v))::Decimal as $expr2, sum0(count(t.v))::Decimal as $expr3] } └─StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum(t.v)), sum0(count(t.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1), sum(t.v), count(t.v)] } diff --git a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml index 922723851944b..89aea24f2bd80 100644 --- a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml @@ -18,7 +18,7 @@ └─StreamDynamicFilter { predicate: (t1.v1 > max(max(t2.v2))), output: [t1.v1, t1._row_id] } ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -77,7 +77,7 @@ ├─StreamProject { exprs: [t1.v1, (t1.v1 + t1.v1) as $expr1, t1._row_id] } │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -129,7 +129,7 @@ ├─StreamExchange { dist: HashShard(t1.v1) } │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: HashShard(max(max(t2.v2))) } - └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -153,7 +153,7 @@ ├─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] } │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -169,7 +169,7 @@ └─StreamDynamicFilter { predicate: (t1.v1 > max(max(t2.v2))), output: [t1.v1, t1._row_id] } ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -191,7 +191,7 @@ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output: [t1.v1, t1._row_id] } ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(2:Int32 * max(max(t2.v2))) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(2:Int32 * max(max(t2.v2))) as $expr1] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -220,7 +220,7 @@ │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) } │ └─StreamExchange { dist: Broadcast } │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + │ └─StreamProject { exprs: [max(max(t2.v2))] } │ └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } │ └─StreamExchange { dist: Single } │ └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } @@ -229,7 +229,7 @@ └─StreamExchange { dist: Broadcast } └─StreamProject { exprs: [(max(max(t2.v2)) + 5:Int32) as $expr1] } └─StreamShare { id: 6 } - └─StreamProject { exprs: [max(max(t2.v2))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t2.v2))] } └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t2.v2), count] } diff --git a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml index e7a1951ffde54..815890d6a73b8 100644 --- a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml @@ -180,7 +180,7 @@ where path_val = t1.id; stream_plan: |- StreamMaterialize { columns: [array_agg], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [array_agg(t1.n order_by($expr1 ASC))], noop_update_hint: true } + └─StreamProject { exprs: [array_agg(t1.n order_by($expr1 ASC))] } └─StreamSimpleAgg { aggs: [array_agg(t1.n order_by($expr1 ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t1.n, (projected_row_id + 1:Int64) as $expr1, t1._row_id, t2.p, t2.p, t2.d, t2.d, projected_row_id, t1.id, t2._row_id] } diff --git a/src/frontend/planner_test/tests/testdata/output/limit.yaml b/src/frontend/planner_test/tests/testdata/output/limit.yaml index 500dbe1dd5824..22fb2add9d30c 100644 --- a/src/frontend/planner_test/tests/testdata/output/limit.yaml +++ b/src/frontend/planner_test/tests/testdata/output/limit.yaml @@ -131,7 +131,7 @@ stream_plan: |- StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } └─StreamTopN { order: [sum0(count) ASC], limit: 1, offset: 0 } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } @@ -154,7 +154,7 @@ stream_plan: |- StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } └─StreamTopN { order: [sum0(count) ASC], limit: 1, offset: 0 } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } diff --git a/src/frontend/planner_test/tests/testdata/output/mv_column_name.yaml b/src/frontend/planner_test/tests/testdata/output/mv_column_name.yaml index 91352992bb17a..3db4034336315 100644 --- a/src/frontend/planner_test/tests/testdata/output/mv_column_name.yaml +++ b/src/frontend/planner_test/tests/testdata/output/mv_column_name.yaml @@ -63,7 +63,7 @@ select count(*), max(a) from t; stream_plan: |- StreamMaterialize { columns: [count, max], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count), max(max(t.a))], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count), max(max(t.a))] } └─StreamSimpleAgg { aggs: [sum0(count), max(max(t.a)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [count, max(t.a)] } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml index 1ea7349b24769..d6b90da0a8c1a 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml @@ -1891,7 +1891,7 @@ │ └─StreamExchange { dist: HashShard(bid.auction) } │ └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], stream_scan_type: ArrangementBackfill, stream_key: [bid._row_id], pk: [_row_id], dist: UpstreamHashShard(bid._row_id) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(bid.auction))) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(bid.auction))) as $expr1] } └─StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(bid.auction)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum0(count), count(bid.auction)] } @@ -1926,7 +1926,7 @@ └── BatchPlanNode Fragment 3 - StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(bid.auction))) as $expr1], noop_update_hint: true } + StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(bid.auction))) as $expr1] } └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(bid.auction)), count] } { tables: [ SimpleAggState: 9 ] } └── StreamExchange Single from 4 @@ -2331,7 +2331,7 @@ └─BatchScan { table: bid, columns: [bid.auction, bid.price, bid.date_time], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [min(min(max(bid.price)))], noop_update_hint: true } + └─StreamProject { exprs: [min(min(max(bid.price)))] } └─StreamSimpleAgg { aggs: [min(min(max(bid.price))), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [$expr1], aggs: [min(max(bid.price)), count] } @@ -2348,7 +2348,7 @@ Fragment 0 StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [min(min(max(bid.price)))], noop_update_hint: true } + └── StreamProject { exprs: [min(min(max(bid.price)))] } └── StreamSimpleAgg { aggs: [min(min(max(bid.price))), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml index 15e1647721d53..35713c9682a35 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml @@ -1878,7 +1878,7 @@ │ └─StreamRowIdGen { row_id_index: 7 } │ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(auction))) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(auction))) as $expr1] } └─StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(auction)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum0(count), count(auction)] } @@ -1915,7 +1915,7 @@ └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { tables: [ Source: 8 ] } Fragment 4 - StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(auction))) as $expr1], noop_update_hint: true } + StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(auction))) as $expr1] } └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(auction)), count] } { tables: [ SimpleAggState: 9 ] } └── StreamExchange Single from 5 @@ -2277,7 +2277,7 @@ └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_plan: |- StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [min(min(max(price)))], noop_update_hint: true } + └─StreamProject { exprs: [min(min(max(price)))] } └─StreamSimpleAgg { aggs: [min(min(max(price))), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [$expr1], aggs: [min(max(price)), count] } @@ -2296,7 +2296,7 @@ Fragment 0 StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [min(min(max(price)))], noop_update_hint: true } + └── StreamProject { exprs: [min(min(max(price)))] } └── StreamSimpleAgg { aggs: [min(min(max(price))), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml index 0658030573dd1..d5d948e5b507c 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml @@ -1517,7 +1517,7 @@ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } │ └─StreamNow { output: [now] } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr5))) as $expr6], noop_update_hint: true } + └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr5))) as $expr6] } └─StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count($expr5)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum0(count), count($expr5)] } @@ -1578,7 +1578,7 @@ └── StreamNow { output: [now] } { tables: [ Now: 10 ] } Fragment 6 - StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr5))) as $expr6], noop_update_hint: true } + StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr5))) as $expr6] } └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count($expr5)), count] } { tables: [ SimpleAggState: 11 ] } └── StreamExchange Single from 7 @@ -2000,7 +2000,7 @@ ) stream_plan: |- StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [min(min(max($expr7)))], noop_update_hint: true } + └─StreamProject { exprs: [min(min(max($expr7)))] } └─StreamSimpleAgg { aggs: [min(min(max($expr7))), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [$expr9], aggs: [min(max($expr7)), count] } @@ -2035,7 +2035,7 @@ Fragment 0 StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [min(min(max($expr7)))], noop_update_hint: true } + └── StreamProject { exprs: [min(min(max($expr7)))] } └── StreamSimpleAgg { aggs: [min(min(max($expr7))), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml index c577b72eaafd6..f065ba33c252d 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml @@ -2059,7 +2059,7 @@ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr4))) as $expr5], noop_update_hint: true } + └─StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr4))) as $expr5] } └─StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count($expr4)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum0(count), count($expr4)] } @@ -2111,7 +2111,7 @@ └── StreamExchange NoShuffle from 2 Fragment 5 - StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr4))) as $expr5], noop_update_hint: true } + StreamProject { exprs: [(sum0(sum0(count)) / sum0(count($expr4))) as $expr5] } └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count($expr4)), count] } { tables: [ SimpleAggState: 9 ] } └── StreamExchange Single from 6 @@ -2533,7 +2533,7 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_plan: |- StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [min(min(max($expr5)))], noop_update_hint: true } + └─StreamProject { exprs: [min(min(max($expr5)))] } └─StreamSimpleAgg { aggs: [min(min(max($expr5))), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [$expr6], aggs: [min(max($expr5)), count] } @@ -2564,7 +2564,7 @@ Fragment 0 StreamMaterialize { columns: [min_final], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [min(min(max($expr5)))], noop_update_hint: true } + └── StreamProject { exprs: [min(min(max($expr5)))] } └── StreamSimpleAgg { aggs: [min(min(max($expr5))), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 diff --git a/src/frontend/planner_test/tests/testdata/output/share.yaml b/src/frontend/planner_test/tests/testdata/output/share.yaml index 7962e4724f347..2cf3aee9fe043 100644 --- a/src/frontend/planner_test/tests/testdata/output/share.yaml +++ b/src/frontend/planner_test/tests/testdata/output/share.yaml @@ -33,7 +33,7 @@ └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } stream_plan: |- StreamMaterialize { columns: [cnt], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg [append_only] { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } @@ -155,7 +155,7 @@ ├─StreamExchange { dist: HashShard(0:Int32) } │ └─StreamProject { exprs: [sum0(count), 0:Int32] } │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + │ └─StreamProject { exprs: [sum0(count)] } │ └─StreamSimpleAgg { aggs: [sum0(count), count] } │ └─StreamExchange { dist: Single } │ └─StreamStatelessSimpleAgg { aggs: [count] } @@ -163,7 +163,7 @@ └─StreamExchange { dist: HashShard(1:Int32) } └─StreamProject { exprs: [sum0(count), 1:Int32] } └─StreamShare { id: 5 } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } @@ -176,13 +176,13 @@ StreamMaterialize { columns: [count, $src(hidden)], stream_key: [$src], pk_columns: [$src], pk_conflict: NoCheck } └─StreamUnion { all: true } ├─StreamExchange { dist: HashShard(0:Int32) } - │ └─StreamProject { exprs: [sum0(count), 0:Int32], noop_update_hint: true } + │ └─StreamProject { exprs: [sum0(count), 0:Int32] } │ └─StreamSimpleAgg { aggs: [sum0(count), count] } │ └─StreamExchange { dist: Single } │ └─StreamStatelessSimpleAgg { aggs: [count] } │ └─StreamTableScan { table: t, columns: [t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) } └─StreamExchange { dist: HashShard(1:Int32) } - └─StreamProject { exprs: [sum0(count), 1:Int32], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count), 1:Int32] } └─StreamSimpleAgg { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } @@ -195,7 +195,7 @@ select count(*) cnt from auction A join auction B on A.id = B.id; stream_plan: |- StreamMaterialize { columns: [cnt], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg [append_only] { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } @@ -216,7 +216,7 @@ with cte as (select a, sum(b) sum from t group by a) select count(*) from cte c1 join cte c2 on c1.a = c2.a; stream_plan: |- StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } @@ -235,7 +235,7 @@ Fragment 0 StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └── StreamProject { exprs: [sum0(count)] } └── StreamSimpleAgg { aggs: [sum0(count), count] } { tables: [ SimpleAggState: 0 ] } └── StreamExchange Single from 1 diff --git a/src/frontend/planner_test/tests/testdata/output/stream_dist_agg.yaml b/src/frontend/planner_test/tests/testdata/output/stream_dist_agg.yaml index 0b7d7d7f2f2bf..48caec86bd940 100644 --- a/src/frontend/planner_test/tests/testdata/output/stream_dist_agg.yaml +++ b/src/frontend/planner_test/tests/testdata/output/stream_dist_agg.yaml @@ -17,13 +17,13 @@ └─BatchScan { table: s, columns: [s.v], distribution: Single } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(s.v)], noop_update_hint: true } + └─StreamProject { exprs: [max(s.v)] } └─StreamSimpleAgg { aggs: [max(s.v), count] } └─StreamTableScan { table: s, columns: [s.v, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamProject { exprs: [max(s.v)], noop_update_hint: true } + └── StreamProject { exprs: [max(s.v)] } └── StreamSimpleAgg { aggs: [max(s.v), count] } { tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] } └── StreamTableScan { table: s, columns: [s.v, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } ├── tables: [ StreamScan: 2 ] @@ -55,13 +55,13 @@ └─BatchScan { table: s, columns: [s.v], distribution: Single } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(s.v)], noop_update_hint: true } + └─StreamProject { exprs: [sum(s.v)] } └─StreamSimpleAgg { aggs: [sum(s.v), count] } └─StreamTableScan { table: s, columns: [s.v, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamProject { exprs: [sum(s.v)], noop_update_hint: true } + └── StreamProject { exprs: [sum(s.v)] } └── StreamSimpleAgg { aggs: [sum(s.v), count] } { tables: [ SimpleAggState: 0 ] } └── StreamTableScan { table: s, columns: [s.v, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } ├── tables: [ StreamScan: 1 ] @@ -91,13 +91,13 @@ └─BatchScan { table: s, columns: [s.v], distribution: Single } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [count(s.v)], noop_update_hint: true } + └─StreamProject { exprs: [count(s.v)] } └─StreamSimpleAgg { aggs: [count(s.v), count] } └─StreamTableScan { table: s, columns: [s.v, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamProject { exprs: [count(s.v)], noop_update_hint: true } + └── StreamProject { exprs: [count(s.v)] } └── StreamSimpleAgg { aggs: [count(s.v), count] } { tables: [ SimpleAggState: 0 ] } └── StreamTableScan { table: s, columns: [s.v, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } ├── tables: [ StreamScan: 1 ] @@ -128,14 +128,14 @@ └─BatchScan { table: s, columns: [s.v, s.s], distribution: Single } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [string_agg(s.s, ',':Varchar order_by(s.v ASC))], noop_update_hint: true } + └─StreamProject { exprs: [string_agg(s.s, ',':Varchar order_by(s.v ASC))] } └─StreamSimpleAgg { aggs: [string_agg(s.s, ',':Varchar order_by(s.v ASC)), count] } └─StreamProject { exprs: [s.s, ',':Varchar, s.v, s.t._row_id] } └─StreamTableScan { table: s, columns: [s.v, s.s, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } { tables: [ Materialize: 4294967294 ] } - └── StreamProject { exprs: [string_agg(s.s, ',':Varchar order_by(s.v ASC))], noop_update_hint: true } + └── StreamProject { exprs: [string_agg(s.s, ',':Varchar order_by(s.v ASC))] } └── StreamSimpleAgg { aggs: [string_agg(s.s, ',':Varchar order_by(s.v ASC)), count] } { tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] } └── StreamProject { exprs: [s.s, ',':Varchar, s.v, s.t._row_id] } └── StreamTableScan { table: s, columns: [s.v, s.s, s.o, s.t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [s.t._row_id], pk: [o, t._row_id], dist: Single } @@ -169,7 +169,7 @@ └─BatchScan { table: t, columns: [t.v], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(max(t.v))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t.v))] } └─StreamSimpleAgg { aggs: [max(max(t.v)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t.v), count] } @@ -179,7 +179,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [max(max(t.v))], noop_update_hint: true } + └── StreamProject { exprs: [max(max(t.v))] } └── StreamSimpleAgg { aggs: [max(max(t.v)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 @@ -223,7 +223,7 @@ select max(v) as a1 from AO; stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(max(ao.v))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(ao.v))] } └─StreamSimpleAgg [append_only] { aggs: [max(max(ao.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [max(ao.v)] } @@ -232,7 +232,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [max(max(ao.v))], noop_update_hint: true } + └── StreamProject { exprs: [max(max(ao.v))] } └── StreamSimpleAgg [append_only] { aggs: [max(max(ao.v)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -268,7 +268,7 @@ └─BatchScan { table: t, columns: [t.v], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(t.v))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(t.v))] } └─StreamSimpleAgg { aggs: [sum(sum(t.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(t.v)] } @@ -277,7 +277,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum(sum(t.v))], noop_update_hint: true } + └── StreamProject { exprs: [sum(sum(t.v))] } └── StreamSimpleAgg { aggs: [sum(sum(t.v)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -308,7 +308,7 @@ select sum(v) as a1 from AO; stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum(ao.v))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum(ao.v))] } └─StreamSimpleAgg [append_only] { aggs: [sum(sum(ao.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(ao.v)] } @@ -317,7 +317,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum(sum(ao.v))], noop_update_hint: true } + └── StreamProject { exprs: [sum(sum(ao.v))] } └── StreamSimpleAgg [append_only] { aggs: [sum(sum(ao.v)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -353,7 +353,7 @@ └─BatchScan { table: t, columns: [t.v], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count(t.v))], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count(t.v))] } └─StreamSimpleAgg { aggs: [sum0(count(t.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count(t.v)] } @@ -362,7 +362,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum0(count(t.v))], noop_update_hint: true } + └── StreamProject { exprs: [sum0(count(t.v))] } └── StreamSimpleAgg { aggs: [sum0(count(t.v)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -393,7 +393,7 @@ select count(v) as a1 from AO; stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count(ao.v))], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count(ao.v))] } └─StreamSimpleAgg [append_only] { aggs: [sum0(count(ao.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count(ao.v)] } @@ -402,7 +402,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum0(count(ao.v))], noop_update_hint: true } + └── StreamProject { exprs: [sum0(count(ao.v))] } └── StreamSimpleAgg [append_only] { aggs: [sum0(count(ao.v)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -438,7 +438,7 @@ └─BatchScan { table: t, columns: [t.o, t.s], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [string_agg(t.s, ',':Varchar order_by(t.o ASC))], noop_update_hint: true } + └─StreamProject { exprs: [string_agg(t.s, ',':Varchar order_by(t.o ASC))] } └─StreamSimpleAgg { aggs: [string_agg(t.s, ',':Varchar order_by(t.o ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t.s, ',':Varchar, t.o, t._row_id] } @@ -447,7 +447,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [string_agg(t.s, ',':Varchar order_by(t.o ASC))], noop_update_hint: true } + └── StreamProject { exprs: [string_agg(t.s, ',':Varchar order_by(t.o ASC))] } └── StreamSimpleAgg { aggs: [string_agg(t.s, ',':Varchar order_by(t.o ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 @@ -480,7 +480,7 @@ select string_agg(s, ',' order by o) as a1 from AO; stream_plan: |- StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [string_agg(ao.s, ',':Varchar order_by(ao.o ASC))], noop_update_hint: true } + └─StreamProject { exprs: [string_agg(ao.s, ',':Varchar order_by(ao.o ASC))] } └─StreamSimpleAgg [append_only] { aggs: [string_agg(ao.s, ',':Varchar order_by(ao.o ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [ao.s, ',':Varchar, ao.o, ao._row_id] } @@ -489,7 +489,7 @@ Fragment 0 StreamMaterialize { columns: [a1], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [string_agg(ao.s, ',':Varchar order_by(ao.o ASC))], noop_update_hint: true } + └── StreamProject { exprs: [string_agg(ao.s, ',':Varchar order_by(ao.o ASC))] } └── StreamSimpleAgg [append_only] { aggs: [string_agg(ao.s, ',':Varchar order_by(ao.o ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 @@ -527,7 +527,7 @@ └─BatchScan { table: t, columns: [t.v], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(max(t.v)), sum0(count(t.v))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(t.v)), sum0(count(t.v))] } └─StreamSimpleAgg { aggs: [max(max(t.v)), sum0(count(t.v)), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(t.v), count(t.v), count] } @@ -537,7 +537,7 @@ Fragment 0 StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [max(max(t.v)), sum0(count(t.v))], noop_update_hint: true } + └── StreamProject { exprs: [max(max(t.v)), sum0(count(t.v))] } └── StreamSimpleAgg { aggs: [max(max(t.v)), sum0(count(t.v)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall0: 0 ] └── StreamExchange Single from 1 @@ -587,7 +587,7 @@ select max(v) as a1, count(v) as a2 from AO; stream_plan: |- StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(max(ao.v)), sum0(count(ao.v))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(ao.v)), sum0(count(ao.v))] } └─StreamSimpleAgg [append_only] { aggs: [max(max(ao.v)), sum0(count(ao.v)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [max(ao.v), count(ao.v)] } @@ -596,7 +596,7 @@ Fragment 0 StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [max(max(ao.v)), sum0(count(ao.v))], noop_update_hint: true } + └── StreamProject { exprs: [max(max(ao.v)), sum0(count(ao.v))] } └── StreamSimpleAgg [append_only] { aggs: [max(max(ao.v)), sum0(count(ao.v)), count] } ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 @@ -632,7 +632,7 @@ └─BatchScan { table: t, columns: [t.v, t.o, t.s], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [count(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))], noop_update_hint: true } + └─StreamProject { exprs: [count(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))] } └─StreamSimpleAgg { aggs: [count(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t.v, t.s, ',':Varchar, t.o, t._row_id] } @@ -641,7 +641,7 @@ Fragment 0 StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [count(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))], noop_update_hint: true } + └── StreamProject { exprs: [count(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))] } └── StreamSimpleAgg { aggs: [count(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall1: 0 ] └── StreamExchange Single from 1 @@ -679,7 +679,7 @@ select count(v) as a1, string_agg(s, ',' order by o) as a2 from AO; stream_plan: |- StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [count(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))], noop_update_hint: true } + └─StreamProject { exprs: [count(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))] } └─StreamSimpleAgg [append_only] { aggs: [count(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [ao.v, ao.s, ',':Varchar, ao.o, ao._row_id] } @@ -688,7 +688,7 @@ Fragment 0 StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [count(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))], noop_update_hint: true } + └── StreamProject { exprs: [count(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))] } └── StreamSimpleAgg [append_only] { aggs: [count(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall1: 0 ] └── StreamExchange Single from 1 @@ -726,7 +726,7 @@ └─BatchScan { table: t, columns: [t.v, t.o, t.s], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))], noop_update_hint: true } + └─StreamProject { exprs: [max(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))] } └─StreamSimpleAgg { aggs: [max(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [t.v, t.s, ',':Varchar, t.o, t._row_id] } @@ -735,7 +735,7 @@ Fragment 0 StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [max(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))], noop_update_hint: true } + └── StreamProject { exprs: [max(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC))] } └── StreamSimpleAgg { aggs: [max(t.v), string_agg(t.s, ',':Varchar order_by(t.o ASC)), count] } ├── tables: [ SimpleAggState: 2, SimpleAggCall0: 0, SimpleAggCall1: 1 ] └── StreamExchange Single from 1 @@ -770,7 +770,7 @@ select max(v) as a1, string_agg(s, ',' order by o) as a2 from AO; stream_plan: |- StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [max(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))], noop_update_hint: true } + └─StreamProject { exprs: [max(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))] } └─StreamSimpleAgg [append_only] { aggs: [max(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC)), count] } └─StreamExchange { dist: Single } └─StreamProject { exprs: [ao.v, ao.s, ',':Varchar, ao.o, ao._row_id] } @@ -779,7 +779,7 @@ Fragment 0 StreamMaterialize { columns: [a1, a2], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [max(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))], noop_update_hint: true } + └── StreamProject { exprs: [max(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC))] } └── StreamSimpleAgg [append_only] { aggs: [max(ao.v), string_agg(ao.s, ',':Varchar order_by(ao.o ASC)), count] } ├── tables: [ SimpleAggState: 1, SimpleAggCall1: 0 ] └── StreamExchange Single from 1 diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml index 49d14526af640..5cdfdf6cf45ea 100644 --- a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml @@ -61,7 +61,7 @@ select count(*) from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on id1 = id2 where a2 < 10; stream_plan: |- StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum0(count)], noop_update_hint: true } + └─StreamProject { exprs: [sum0(count)] } └─StreamSimpleAgg [append_only] { aggs: [sum0(count), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [count] } diff --git a/src/frontend/planner_test/tests/testdata/output/tpch.yaml b/src/frontend/planner_test/tests/testdata/output/tpch.yaml index dbb7a5c08a62a..3c43faa8d2494 100644 --- a/src/frontend/planner_test/tests/testdata/output/tpch.yaml +++ b/src/frontend/planner_test/tests/testdata/output/tpch.yaml @@ -1160,7 +1160,7 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_quantity, lineitem.l_shipdate], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum($expr1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum($expr1))] } └─StreamSimpleAgg { aggs: [sum(sum($expr1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1)] } @@ -1171,7 +1171,7 @@ Fragment 0 StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum(sum($expr1))], noop_update_hint: true } + └── StreamProject { exprs: [sum(sum($expr1))] } └── StreamSimpleAgg { aggs: [sum(sum($expr1)), count] } { tables: [ SimpleAggState: 0 ] } └── StreamExchange Single from 1 @@ -2389,7 +2389,7 @@ │ └─StreamFilter { predicate: (nation.n_name = 'ARGENTINA':Varchar) } │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], stream_scan_type: ArrangementBackfill, stream_key: [nation.n_nationkey], pk: [n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum(sum($expr2)) * 0.0001000000:Decimal) as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [(sum(sum($expr2)) * 0.0001000000:Decimal) as $expr3] } └─StreamSimpleAgg { aggs: [sum(sum($expr2)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr2)] } @@ -2461,7 +2461,7 @@ └── BatchPlanNode Fragment 8 - StreamProject { exprs: [(sum(sum($expr2)) * 0.0001000000:Decimal) as $expr3], noop_update_hint: true } + StreamProject { exprs: [(sum(sum($expr2)) * 0.0001000000:Decimal) as $expr3] } └── StreamSimpleAgg { aggs: [sum(sum($expr2)), count] } { tables: [ SimpleAggState: 16 ] } └── StreamExchange Single from 9 @@ -2818,7 +2818,7 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [promo_revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / sum(sum($expr2))) as $expr3], noop_update_hint: true } + └─StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / sum(sum($expr2))) as $expr3] } └─StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum($expr2)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1), sum($expr2)] } @@ -2834,8 +2834,9 @@ Fragment 0 StreamMaterialize { columns: [promo_revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / sum(sum($expr2))) as $expr3], noop_update_hint: true } - └── StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum($expr2)), count] } { tables: [ SimpleAggState: 0 ] } + └── StreamProject { exprs: [((100.00:Decimal * sum(sum($expr1))) / sum(sum($expr2))) as $expr3] } + └── StreamSimpleAgg { aggs: [sum(sum($expr1)), sum(sum($expr2)), count] } + ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 Fragment 1 @@ -2965,7 +2966,7 @@ │ └─StreamFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < '1993-04-01 00:00:00':Timestamp) } │ └─StreamTableScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], stream_scan_type: ArrangementBackfill, stream_key: [lineitem.l_orderkey, lineitem.l_linenumber], pk: [l_orderkey, l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } └─StreamExchange { dist: HashShard(max(max(sum($expr1)))) } - └─StreamProject { exprs: [max(max(sum($expr1)))], noop_update_hint: true } + └─StreamProject { exprs: [max(max(sum($expr1)))] } └─StreamSimpleAgg { aggs: [max(max(sum($expr1))), count] } └─StreamExchange { dist: Single } └─StreamHashAgg { group_key: [_vnode], aggs: [max(sum($expr1)), count] } @@ -3019,7 +3020,7 @@ └── BatchPlanNode Fragment 6 - StreamProject { exprs: [max(max(sum($expr1)))], noop_update_hint: true } + StreamProject { exprs: [max(max(sum($expr1)))] } └── StreamSimpleAgg { aggs: [max(max(sum($expr1))), count] } { tables: [ SimpleAggState: 14, SimpleAggCall0: 13 ] } └── StreamExchange Single from 7 @@ -3295,7 +3296,7 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [avg_yearly], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [(sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal) as $expr2], noop_update_hint: true } + └─StreamProject { exprs: [(sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal) as $expr2] } └─StreamSimpleAgg { aggs: [sum(sum(lineitem.l_extendedprice)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(lineitem.l_extendedprice)] } @@ -3318,8 +3319,9 @@ Fragment 0 StreamMaterialize { columns: [avg_yearly], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [(sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal) as $expr2], noop_update_hint: true } - └── StreamSimpleAgg { aggs: [sum(sum(lineitem.l_extendedprice)), count] } { tables: [ SimpleAggState: 0 ] } + └── StreamProject { exprs: [(sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal) as $expr2] } + └── StreamSimpleAgg { aggs: [sum(sum(lineitem.l_extendedprice)), count] } + ├── tables: [ SimpleAggState: 0 ] └── StreamExchange Single from 1 Fragment 1 @@ -3670,7 +3672,7 @@ └─BatchScan { table: lineitem, columns: [lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipinstruct, lineitem.l_shipmode], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } - └─StreamProject { exprs: [sum(sum($expr1))], noop_update_hint: true } + └─StreamProject { exprs: [sum(sum($expr1))] } └─StreamSimpleAgg { aggs: [sum(sum($expr1)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum($expr1)] } @@ -3688,7 +3690,7 @@ Fragment 0 StreamMaterialize { columns: [revenue], stream_key: [], pk_columns: [], pk_conflict: NoCheck } ├── tables: [ Materialize: 4294967294 ] - └── StreamProject { exprs: [sum(sum($expr1))], noop_update_hint: true } + └── StreamProject { exprs: [sum(sum($expr1))] } └── StreamSimpleAgg { aggs: [sum(sum($expr1)), count] } { tables: [ SimpleAggState: 0 ] } └── StreamExchange Single from 1 @@ -4340,7 +4342,7 @@ │ └─StreamExchange { dist: HashShard(orders.o_custkey) } │ └─StreamTableScan { table: orders, columns: [orders.o_custkey, orders.o_orderkey], stream_scan_type: ArrangementBackfill, stream_key: [orders.o_orderkey], pk: [o_orderkey], dist: UpstreamHashShard(orders.o_orderkey) } └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [(sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal))::Decimal) as $expr1], noop_update_hint: true } + └─StreamProject { exprs: [(sum(sum(customer.c_acctbal)) / sum0(count(customer.c_acctbal))::Decimal) as $expr1] } └─StreamSimpleAgg { aggs: [sum(sum(customer.c_acctbal)), sum0(count(customer.c_acctbal)), count] } └─StreamExchange { dist: Single } └─StreamStatelessSimpleAgg { aggs: [sum(customer.c_acctbal), count(customer.c_acctbal)] } diff --git a/src/frontend/src/optimizer/plan_node/logical_agg.rs b/src/frontend/src/optimizer/plan_node/logical_agg.rs index 9e774628fc262..b0ad102ee693c 100644 --- a/src/frontend/src/optimizer/plan_node/logical_agg.rs +++ b/src/frontend/src/optimizer/plan_node/logical_agg.rs @@ -86,6 +86,8 @@ impl LogicalAgg { bail!("expected at least one agg call"); } + let need_row_merge: bool = Self::need_row_merge(&approx_percentile); + // ====== Handle normal aggs let total_agg_calls = core .agg_calls @@ -98,8 +100,12 @@ impl LogicalAgg { let local_agg = StreamStatelessSimpleAgg::new(core); let exchange = RequiredDist::single().enforce_if_not_satisfies(local_agg.into(), &Order::any())?; - let global_agg = - new_stream_simple_agg(Agg::new(total_agg_calls, IndexSet::empty(), exchange)); + + let must_output_per_barrier = need_row_merge; + let global_agg = new_stream_simple_agg( + Agg::new(total_agg_calls, IndexSet::empty(), exchange), + must_output_per_barrier, + ); // ====== Merge approx percentile and normal aggs Self::add_row_merge_if_needed( @@ -129,6 +135,7 @@ impl LogicalAgg { }; bail!("expected at least one agg call"); } + let need_row_merge = Self::need_row_merge(&approx_percentile); // Generate vnode via project // TODO(kwannoel): We should apply Project optimization rules here. @@ -157,19 +164,26 @@ impl LogicalAgg { let global_agg = if self.group_key().is_empty() { let exchange = RequiredDist::single().enforce_if_not_satisfies(local_agg.into(), &Order::any())?; - let global_agg = new_stream_simple_agg(Agg::new( - core.agg_calls - .iter() - .enumerate() - .map(|(partial_output_idx, agg_call)| { - agg_call.partial_to_total_agg_call(n_local_group_key + partial_output_idx) - }) - .collect(), - global_group_key.into_iter().collect(), - exchange, - )); + let must_output_per_barrier = need_row_merge; + let global_agg = new_stream_simple_agg( + Agg::new( + core.agg_calls + .iter() + .enumerate() + .map(|(partial_output_idx, agg_call)| { + agg_call + .partial_to_total_agg_call(n_local_group_key + partial_output_idx) + }) + .collect(), + global_group_key.into_iter().collect(), + exchange, + ), + must_output_per_barrier, + ); global_agg.into() } else { + // the `RowMergeExec` has not supported keyed merge + assert!(!need_row_merge); let exchange = RequiredDist::shard_by_key(input_col_num, &global_group_key) .enforce_if_not_satisfies(local_agg.into(), &Order::any())?; // Local phase should have reordered the group keys into their required order. @@ -203,7 +217,7 @@ impl LogicalAgg { let mut core = self.core.clone(); let input = RequiredDist::single().enforce_if_not_satisfies(stream_input, &Order::any())?; core.input = input; - Ok(new_stream_simple_agg(core).into()) + Ok(new_stream_simple_agg(core, false).into()) } fn gen_shuffle_plan(&self, stream_input: PlanRef) -> Result { @@ -339,6 +353,10 @@ impl LogicalAgg { )) } + fn need_row_merge(approx_percentile: &Option) -> bool { + approx_percentile.is_some() + } + /// Add `RowMerge` if needed fn add_row_merge_if_needed( approx_percentile: Option, @@ -346,7 +364,11 @@ impl LogicalAgg { approx_percentile_col_mapping: ColIndexMapping, non_approx_percentile_col_mapping: ColIndexMapping, ) -> Result { + // just for assert + let need_row_merge = Self::need_row_merge(&approx_percentile); + if let Some(approx_percentile) = approx_percentile { + assert!(need_row_merge); let row_merge = StreamRowMerge::new( approx_percentile, global_agg, @@ -355,6 +377,7 @@ impl LogicalAgg { )?; Ok(row_merge.into()) } else { + assert!(!need_row_merge); Ok(global_agg) } } @@ -1305,9 +1328,9 @@ fn find_or_append_row_count(mut logical: Agg) -> (Agg, usize) (logical, row_count_idx) } -fn new_stream_simple_agg(core: Agg) -> StreamSimpleAgg { +fn new_stream_simple_agg(core: Agg, must_output_per_barrier: bool) -> StreamSimpleAgg { let (logical, row_count_idx) = find_or_append_row_count(core); - StreamSimpleAgg::new(logical, row_count_idx) + StreamSimpleAgg::new(logical, row_count_idx, must_output_per_barrier) } fn new_stream_hash_agg(core: Agg, vnode_col_idx: Option) -> StreamHashAgg { @@ -1386,19 +1409,12 @@ impl ToStream for LogicalAgg { panic!("the root PlanNode must be StreamHashAgg, StreamSimpleAgg, StreamGlobalApproxPercentile, or StreamRowMerge"); }; - let is_hash_agg = !self.group_key().is_empty(); - // "Simple Agg" includes normal simple agg, as well as approx percentile simple 2 phase agg. - let is_simple_agg = !is_hash_agg; - if self.agg_calls().len() == n_final_agg_calls && is_hash_agg { + if self.agg_calls().len() == n_final_agg_calls { // an existing `count(*)` is used as row count column in `StreamXxxAgg` Ok(plan) } else { - // For hash agg, a `count(*)` is appended, should project the output. - // For simple agg, we output every epoch, so we will always add a project - // to filter out no-op updates, and we don't need the following assert. - if is_hash_agg { - assert_eq!(self.agg_calls().len() + 1, n_final_agg_calls); - } + // a `count(*)` is appended, should project the output + assert_eq!(self.agg_calls().len() + 1, n_final_agg_calls); Ok(StreamProject::new(generic::Project::with_out_col_idx( plan, 0..self.schema().len(), @@ -1407,9 +1423,7 @@ impl ToStream for LogicalAgg { // Since it'll be pruned immediately in `StreamProject`, the update records are likely to be // no-op. So we set the hint to instruct the executor to eliminate them. // See https://github.com/risingwavelabs/risingwave/issues/17030. - // Further for simple agg, we also have to set the hint to eliminate no-op updates. - // Since we will output every epoch. - .with_noop_update_hint(self.agg_calls().is_empty() || is_simple_agg) + .with_noop_update_hint(self.agg_calls().is_empty()) .into()) } } diff --git a/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs b/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs index 6ecaa4c308f5e..f9f125654f402 100644 --- a/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs +++ b/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs @@ -33,10 +33,18 @@ pub struct StreamSimpleAgg { /// The index of `count(*)` in `agg_calls`. row_count_idx: usize, + + // Required by the downstream `RowMerge`, + // currently only used by the `approx_percentile`'s two phase plan + must_output_per_barrier: bool, } impl StreamSimpleAgg { - pub fn new(core: generic::Agg, row_count_idx: usize) -> Self { + pub fn new( + core: generic::Agg, + row_count_idx: usize, + must_output_per_barrier: bool, + ) -> Self { assert_eq!(core.agg_calls[row_count_idx], PlanAggCall::count_star()); let input = core.input.clone(); @@ -62,6 +70,7 @@ impl StreamSimpleAgg { base, core, row_count_idx, + must_output_per_barrier, } } @@ -75,7 +84,11 @@ impl Distill for StreamSimpleAgg { let name = plan_node_name!("StreamSimpleAgg", { "append_only", self.input().append_only() }, ); - childless_record(name, self.core.fields_pretty()) + let mut vec = self.core.fields_pretty(); + if self.must_output_per_barrier { + vec.push(("must_output_per_barrier", "true".into())); + } + childless_record(name, vec) } } @@ -89,7 +102,7 @@ impl PlanTreeNodeUnary for StreamSimpleAgg { input, ..self.core.clone() }; - Self::new(logical, self.row_count_idx) + Self::new(logical, self.row_count_idx, self.must_output_per_barrier) } } impl_plan_tree_node_for_unary! { StreamSimpleAgg } @@ -137,6 +150,7 @@ impl StreamNode for StreamSimpleAgg { .collect(), row_count_index: self.row_count_idx as u32, version: PbAggNodeVersion::Issue13465 as _, + must_output_per_barrier: self.must_output_per_barrier, }) } } @@ -149,7 +163,7 @@ impl ExprRewritable for StreamSimpleAgg { fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef { let mut core = self.core.clone(); core.rewrite_exprs(r); - Self::new(core, self.row_count_idx).into() + Self::new(core, self.row_count_idx, self.must_output_per_barrier).into() } } diff --git a/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs b/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs index 93c56efad3d5f..edb9121baf595 100644 --- a/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs +++ b/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs @@ -102,6 +102,7 @@ impl StreamNode for StreamStatelessSimpleAgg { is_append_only: self.input().append_only(), distinct_dedup_tables: Default::default(), version: AggNodeVersion::Issue13465 as _, + must_output_per_barrier: false, // this is not used }) } } diff --git a/src/stream/src/executor/agg_common.rs b/src/stream/src/executor/agg_common.rs index 2cb3cad8fb2d8..c185222298d80 100644 --- a/src/stream/src/executor/agg_common.rs +++ b/src/stream/src/executor/agg_common.rs @@ -46,7 +46,9 @@ pub struct AggExecutorArgs { pub trait AggExecutorExtraArgs {} -pub struct SimpleAggExecutorExtraArgs {} +pub struct SimpleAggExecutorExtraArgs { + pub must_output_per_barrier: bool, +} impl AggExecutorExtraArgs for SimpleAggExecutorExtraArgs {} /// Extra arguments needed to construct an `HashAggExecutor`. diff --git a/src/stream/src/executor/integration_tests.rs b/src/stream/src/executor/integration_tests.rs index b03189e932a87..d65abc5a5ce53 100644 --- a/src/stream/src/executor/integration_tests.rs +++ b/src/stream/src/executor/integration_tests.rs @@ -192,6 +192,7 @@ async fn test_merger_sum_aggr() { 2, // row_count_index vec![], 2, + false, ) .await; diff --git a/src/stream/src/executor/simple_agg.rs b/src/stream/src/executor/simple_agg.rs index a08049268e5b4..fdecd5b7a4502 100644 --- a/src/stream/src/executor/simple_agg.rs +++ b/src/stream/src/executor/simple_agg.rs @@ -14,6 +14,7 @@ use std::collections::HashMap; +use risingwave_common::array::stream_record::Record; use risingwave_common::util::epoch::EpochPair; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::aggregate::{build_retractable, AggCall, BoxedAggregateFunction}; @@ -83,6 +84,10 @@ struct ExecutorInner { /// Extreme state cache size extreme_cache_size: usize, + + /// Required by the downstream `RowMergeExecutor`, + /// currently only used by the `approx_percentile`'s two phase plan + must_output_per_barrier: bool, } impl ExecutorInner { @@ -129,6 +134,7 @@ impl SimpleAggExecutor { distinct_dedup_tables: args.distinct_dedup_tables, watermark_epoch: args.watermark_epoch, extreme_cache_size: args.extreme_cache_size, + must_output_per_barrier: args.extra.must_output_per_barrier, }, }) } @@ -201,7 +207,16 @@ impl SimpleAggExecutor { .agg_group .build_change(&this.storages, &this.agg_funcs) .await? - .map(|change| change.to_stream_chunk(&this.info.schema.data_types())); + .and_then(|change| { + if !this.must_output_per_barrier { + if let Record::Update { old_row, new_row } = &change { + if old_row == new_row { + return None; + } + }; + } + Some(change.to_stream_chunk(&this.info.schema.data_types())) + }); // Commit all state tables. futures::future::try_join_all(this.all_state_tables_mut().map(|table| table.commit(epoch))) @@ -343,6 +358,7 @@ mod tests { 0, vec![2], 1, + false, ) .await; let mut simple_agg = simple_agg.execute(); @@ -431,6 +447,7 @@ mod tests { 0, vec![2], 1, + true, ) .await; let mut simple_agg = simple_agg.execute(); @@ -481,4 +498,80 @@ mod tests { Message::Barrier { .. } ); } + + // NOTE(kwannoel): `approx_percentile` + `keyed_merge` depend on this property for correctness. + #[tokio::test] + async fn test_simple_aggregation_omit_noop_update() { + let store = MemoryStateStore::new(); + let schema = Schema { + fields: vec![ + Field::unnamed(DataType::Int64), + Field::unnamed(DataType::Int64), + // primary key column` + Field::unnamed(DataType::Int64), + ], + }; + let (mut tx, source) = MockSource::channel(); + let source = source.into_executor(schema, vec![2]); + // initial barrier + tx.push_barrier(test_epoch(1), false); + // next barrier + tx.push_barrier(test_epoch(2), false); + tx.push_chunk(StreamChunk::from_pretty( + " I I I + + 100 200 1001 + - 100 200 1001", + )); + tx.push_barrier(test_epoch(3), false); + tx.push_barrier(test_epoch(4), false); + + let agg_calls = vec![ + AggCall::from_pretty("(count:int8)"), + AggCall::from_pretty("(sum:int8 $0:int8)"), + AggCall::from_pretty("(sum:int8 $1:int8)"), + AggCall::from_pretty("(min:int8 $0:int8)"), + ]; + + let simple_agg = new_boxed_simple_agg_executor( + ActorContext::for_test(123), + store, + source, + false, + agg_calls, + 0, + vec![2], + 1, + false, + ) + .await; + let mut simple_agg = simple_agg.execute(); + + // Consume the init barrier + simple_agg.next().await.unwrap().unwrap(); + // Consume stream chunk + let msg = simple_agg.next().await.unwrap().unwrap(); + assert_eq!( + *msg.as_chunk().unwrap(), + StreamChunk::from_pretty( + " I I I I + + 0 . . . " + ) + ); + assert_matches!( + simple_agg.next().await.unwrap().unwrap(), + Message::Barrier { .. } + ); + + // No stream chunk + assert_matches!( + simple_agg.next().await.unwrap().unwrap(), + Message::Barrier { .. } + ); + + // No stream chunk + assert_matches!( + simple_agg.next().await.unwrap().unwrap(), + Message::Barrier { .. } + ); + } } diff --git a/src/stream/src/executor/test_utils.rs b/src/stream/src/executor/test_utils.rs index db024411ea0ad..4744bae374bfb 100644 --- a/src/stream/src/executor/test_utils.rs +++ b/src/stream/src/executor/test_utils.rs @@ -515,6 +515,7 @@ pub mod agg_executor { row_count_index: usize, pk_indices: PkIndices, executor_id: u64, + must_output_per_barrier: bool, ) -> Executor { let storages = future::join_all(agg_calls.iter().enumerate().map(|(idx, agg_call)| { create_agg_state_storage( @@ -560,7 +561,9 @@ pub mod agg_executor { intermediate_state_table, distinct_dedup_tables: Default::default(), watermark_epoch: Arc::new(AtomicU64::new(0)), - extra: SimpleAggExecutorExtraArgs {}, + extra: SimpleAggExecutorExtraArgs { + must_output_per_barrier, + }, }) .unwrap(); (info, exec).into() diff --git a/src/stream/src/from_proto/simple_agg.rs b/src/stream/src/from_proto/simple_agg.rs index 16809edb8bcaf..689acc7d16a9c 100644 --- a/src/stream/src/from_proto/simple_agg.rs +++ b/src/stream/src/from_proto/simple_agg.rs @@ -54,6 +54,7 @@ impl ExecutorBuilder for SimpleAggExecutorBuilder { let distinct_dedup_tables = build_distinct_dedup_table_from_proto(node.get_distinct_dedup_tables(), store, None) .await; + let must_output_per_barrier = node.get_must_output_per_barrier(); let exec = SimpleAggExecutor::new(AggExecutorArgs { version: node.version(), @@ -70,7 +71,9 @@ impl ExecutorBuilder for SimpleAggExecutorBuilder { intermediate_state_table, distinct_dedup_tables, watermark_epoch: params.watermark_epoch, - extra: SimpleAggExecutorExtraArgs {}, + extra: SimpleAggExecutorExtraArgs { + must_output_per_barrier, + }, })?; Ok((params.info, exec).into()) From 847610a0e1007207d44dcadd6c61f5373ed4bdb9 Mon Sep 17 00:00:00 2001 From: xxchan Date: Wed, 4 Sep 2024 09:58:19 +0800 Subject: [PATCH 15/26] refactor: use high watermark to finish backfill faster (#18342) --- .../source_inline/kafka/shared_source.slt | 95 ++++++++++++------- src/connector/src/source/base.rs | 27 ++++++ .../src/source/kafka/enumerator/client.rs | 1 + .../src/source/kafka/source/reader.rs | 34 ++++++- src/connector/src/source/reader/reader.rs | 72 +++++++++++++- .../source/source_backfill_executor.rs | 55 +++++++++-- 6 files changed, 233 insertions(+), 51 deletions(-) diff --git a/e2e_test/source_inline/kafka/shared_source.slt b/e2e_test/source_inline/kafka/shared_source.slt index c481e609ffccd..51a9f1e5ee1b3 100644 --- a/e2e_test/source_inline/kafka/shared_source.slt +++ b/e2e_test/source_inline/kafka/shared_source.slt @@ -6,6 +6,29 @@ SET rw_enable_shared_source TO true; system ok rpk topic create shared_source -p 4 +# Test create source before produing data. +statement ok +create source s_before_produce (v1 int, v2 varchar) with ( + ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON}, + topic = 'shared_source', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON; + +statement ok +create materialized view mv_before_produce as select * from s_before_produce; + +sleep 2s + +# All partitions starts with backfill_info: NoDataToBackfill, so it finishes immediately. +system ok +internal_table.mjs --name mv_before_produce --type sourcebackfill +---- +0,"""Finished""" +1,"""Finished""" +2,"""Finished""" +3,"""Finished""" + + system ok cat << EOF | rpk topic produce shared_source -f "%p %v\n" -p 0 0 {"v1": 1, "v2": "a"} @@ -21,7 +44,7 @@ create source s0 (v1 int, v2 varchar) with ( scan.startup.mode = 'earliest' ) FORMAT PLAIN ENCODE JSON; -query I +query ? select count(*) from rw_internal_tables where name like '%s0%'; ---- 1 @@ -41,21 +64,24 @@ create materialized view mv_1 as select * from s0; # Wait enough time to ensure SourceExecutor consumes all Kafka data. sleep 2s -# SourceExecutor's ingestion started, but it only starts from latest. +# SourceExecutor's ingestion started, but it only starts from latest (offset 1). system ok internal_table.mjs --name s0 --type source ---- (empty) -# offset 0 must be backfilled, not from upstream. +# SourceBackfill starts from offset 0, with backfill_info: HasDataToBackfill { latest_offset: "0" } (decided by kafka high watermark). +# (meaning upstream already consumed offset 0, so we only need to backfill to offset 0) +# After backfilling offset 0, it enters SourceCachingUp state. Now the backfill is finished. +# We wait for SourceExecutor to produce offset > 0. system ok internal_table.mjs --name mv_1 --type sourcebackfill ---- -0,"{""Backfilling"": ""0""}" -1,"{""Backfilling"": ""0""}" -2,"{""Backfilling"": ""0""}" -3,"{""Backfilling"": ""0""}" +0,"{""SourceCachingUp"": ""0""}" +1,"{""SourceCachingUp"": ""0""}" +2,"{""SourceCachingUp"": ""0""}" +3,"{""SourceCachingUp"": ""0""}" # This does not affect the behavior for CREATE MATERIALIZED VIEW below. It also uses the shared source, and creates SourceBackfillExecutor. @@ -67,7 +93,7 @@ create materialized view mv_2 as select * from s0; sleep 2s -query IT rowsort +query ?? rowsort select v1, v2 from s0; ---- 1 a @@ -75,7 +101,7 @@ select v1, v2 from s0; 3 c 4 d -query IT rowsort +query ?? rowsort select v1, v2 from mv_1; ---- 1 a @@ -83,7 +109,7 @@ select v1, v2 from mv_1; 3 c 4 d -query IT rowsort +query ?? rowsort select v1, v2 from mv_2; ---- 1 a @@ -111,7 +137,7 @@ internal_table.mjs --name s0 --type source 3,"{""split_info"": {""partition"": 3, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -query IT rowsort +query ?? rowsort select v1, v2 from s0; ---- 1 a @@ -123,7 +149,7 @@ select v1, v2 from s0; 4 d 4 dd -query IT rowsort +query ?? rowsort select v1, v2 from mv_1; ---- 1 a @@ -146,18 +172,14 @@ internal_table.mjs --name s0 --type source 3,"{""split_info"": {""partition"": 3, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -# The result is non-deterministic: -# If the upstream row comes before the backfill row, it will be ignored, and the result state is "{""Backfilling"": ""1""}". -# If the upstream row comes after the backfill row, the result state is Finished. -# Uncomment below and run manually to see the result. - -# system ok -# internal_table.mjs --name mv_1 --type sourcebackfill -# ---- -# 0,"{""Finished""}" -# 1,"{""Finished""}" -# 2,"{""Finished""}" -# 3,"{""Finished""}" +# Transition from SourceCachingUp to Finished after consuming one upstream message. +system ok +internal_table.mjs --name mv_1 --type sourcebackfill +---- +0,"""Finished""" +1,"""Finished""" +2,"""Finished""" +3,"""Finished""" system ok @@ -173,7 +195,7 @@ done sleep 3s -query IT rowsort +query ?? rowsort select v1, count(*) from s0 group by v1; ---- 1 12 @@ -181,7 +203,7 @@ select v1, count(*) from s0 group by v1; 3 12 4 12 -query IT rowsort +query ?? rowsort select v1, count(*) from mv_1 group by v1; ---- 1 12 @@ -189,6 +211,14 @@ select v1, count(*) from mv_1 group by v1; 3 12 4 12 +query ?? rowsort +select v1, count(*) from mv_before_produce group by v1; +---- +1 12 +2 12 +3 12 +4 12 + # start_offset changed to 11 system ok @@ -200,15 +230,8 @@ internal_table.mjs --name s0 --type source 3,"{""split_info"": {""partition"": 3, ""start_offset"": 11, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -# Now it is highly probable that all partitions have finished. -system ok -internal_table.mjs --name mv_1 --type sourcebackfill ----- -0,"""Finished""" -1,"""Finished""" -2,"""Finished""" -3,"""Finished""" - - statement ok drop source s0 cascade; + +statement ok +drop source s_before_produce cascade; diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index 38c2f25eb0336..d2b5aa1e88b4c 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -370,6 +370,33 @@ pub trait SplitReader: Sized + Send { ) -> crate::error::ConnectorResult; fn into_stream(self) -> BoxChunkSourceStream; + + fn backfill_info(&self) -> HashMap { + HashMap::new() + } +} + +/// Information used to determine whether we should start and finish source backfill. +/// +/// XXX: if a connector cannot provide the latest offsets (but we want to make it shareable), +/// perhaps we should ban blocking DDL for it. +#[derive(Debug, Clone)] +pub enum BackfillInfo { + HasDataToBackfill { + /// The last available offsets for each split (**inclusive**). + /// + /// This will be used to determine whether source backfill is finished when + /// there are no _new_ messages coming from upstream `SourceExecutor`. Otherwise, + /// blocking DDL cannot finish until new messages come. + /// + /// When there are upstream messages, we will use the latest offsets from the upstream. + latest_offset: String, + }, + /// If there are no messages in the split at all, we don't need to start backfill. + /// In this case, there will be no message from the backfill stream too. + /// If we started backfill, we cannot finish it until new messages come. + /// So we mark this a special case for optimization. + NoDataToBackfill, } for_all_sources!(impl_connector_properties); diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index ff007076c1338..5551c12b433b3 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -170,6 +170,7 @@ impl KafkaSplitEnumerator { self.report_high_watermark(*partition, high); map.insert(*partition, (low, high)); } + tracing::debug!("fetch kafka watermarks: {map:?}"); Ok(map) } diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index 5ace1820b4249..72d4c36377c81 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -34,13 +34,14 @@ use crate::source::kafka::{ KafkaContextCommon, KafkaProperties, KafkaSplit, RwConsumerContext, KAFKA_ISOLATION_LEVEL, }; use crate::source::{ - into_chunk_stream, BoxChunkSourceStream, Column, SourceContextRef, SplitId, SplitMetaData, - SplitReader, + into_chunk_stream, BackfillInfo, BoxChunkSourceStream, Column, SourceContextRef, SplitId, + SplitMetaData, SplitReader, }; pub struct KafkaSplitReader { consumer: StreamConsumer, offsets: HashMap, Option)>, + backfill_info: HashMap, bytes_per_second: usize, max_num_messages: usize, parser_config: ParserConfig, @@ -106,7 +107,7 @@ impl SplitReader for KafkaSplitReader { let mut tpl = TopicPartitionList::with_capacity(splits.len()); let mut offsets = HashMap::new(); - + let mut backfill_info = HashMap::new(); for split in splits { offsets.insert(split.id(), (split.start_offset, split.stop_offset)); @@ -121,7 +122,29 @@ impl SplitReader for KafkaSplitReader { } else { tpl.add_partition(split.topic.as_str(), split.partition); } + + let (low, high) = consumer + .fetch_watermarks( + split.topic.as_str(), + split.partition, + properties.common.sync_call_timeout, + ) + .await?; + tracing::debug!("fetch kafka watermarks: low: {low}, high: {high}, split: {split:?}"); + // note: low is inclusive, high is exclusive + if low == high { + backfill_info.insert(split.id(), BackfillInfo::NoDataToBackfill); + } else { + debug_assert!(high > 0); + backfill_info.insert( + split.id(), + BackfillInfo::HasDataToBackfill { + latest_offset: (high - 1).to_string(), + }, + ); + } } + tracing::debug!("backfill_info: {:?}", backfill_info); consumer.assign(&tpl)?; @@ -143,6 +166,7 @@ impl SplitReader for KafkaSplitReader { Ok(Self { consumer, offsets, + backfill_info, bytes_per_second, max_num_messages, parser_config, @@ -155,6 +179,10 @@ impl SplitReader for KafkaSplitReader { let source_context = self.source_ctx.clone(); into_chunk_stream(self.into_data_stream(), parser_config, source_context) } + + fn backfill_info(&self) -> HashMap { + self.backfill_info.clone() + } } impl KafkaSplitReader { diff --git a/src/connector/src/source/reader/reader.rs b/src/connector/src/source/reader/reader.rs index 61468bd72a4b6..95764792c0025 100644 --- a/src/connector/src/source/reader/reader.rs +++ b/src/connector/src/source/reader/reader.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::Arc; use anyhow::Context; @@ -34,8 +35,9 @@ use crate::source::filesystem::opendal_source::{ }; use crate::source::filesystem::{FsPageItem, OpendalFsSplit}; use crate::source::{ - create_split_reader, BoxChunkSourceStream, BoxTryStream, Column, ConnectorProperties, - ConnectorState, SourceColumnDesc, SourceContext, SplitReader, WaitCheckpointTask, + create_split_reader, BackfillInfo, BoxChunkSourceStream, BoxTryStream, Column, + ConnectorProperties, ConnectorState, SourceColumnDesc, SourceContext, SplitId, SplitReader, + WaitCheckpointTask, }; use crate::{dispatch_source_prop, WithOptionsSecResolved}; @@ -129,6 +131,72 @@ impl SourceReader { }) } + pub async fn build_stream_for_backfill( + &self, + state: ConnectorState, + column_ids: Vec, + source_ctx: Arc, + ) -> ConnectorResult<(BoxChunkSourceStream, HashMap)> { + let Some(splits) = state else { + return Ok((pending().boxed(), HashMap::new())); + }; + let config = self.config.clone(); + let columns = self.get_target_columns(column_ids)?; + + let data_gen_columns = Some( + columns + .iter() + .map(|col| Column { + name: col.name.clone(), + data_type: col.data_type.clone(), + is_visible: col.is_visible(), + }) + .collect_vec(), + ); + + let parser_config = ParserConfig { + specific: self.parser_config.clone(), + common: CommonParserConfig { + rw_columns: columns, + }, + }; + + let support_multiple_splits = config.support_multiple_splits(); + dispatch_source_prop!(config, prop, { + let readers = if support_multiple_splits { + tracing::debug!( + "spawning connector split reader for multiple splits {:?}", + splits + ); + let reader = + create_split_reader(*prop, splits, parser_config, source_ctx, data_gen_columns) + .await?; + + vec![reader] + } else { + let to_reader_splits = splits.into_iter().map(|split| vec![split]); + try_join_all(to_reader_splits.into_iter().map(|splits| { + tracing::debug!(?splits, "spawning connector split reader"); + let props = prop.clone(); + let data_gen_columns = data_gen_columns.clone(); + let parser_config = parser_config.clone(); + // TODO: is this reader split across multiple threads...? Realistically, we want + // source_ctx to live in a single actor. + let source_ctx = source_ctx.clone(); + create_split_reader(*props, splits, parser_config, source_ctx, data_gen_columns) + })) + .await? + }; + + let backfill_info = readers.iter().flat_map(|r| r.backfill_info()).collect(); + + Ok(( + select_all(readers.into_iter().map(|r| r.into_stream())).boxed(), + backfill_info, + )) + }) + } + /// Build `SplitReader`s and then `BoxChunkSourceStream` from the given `ConnectorState` (`SplitImpl`s). pub async fn build_stream( &self, diff --git a/src/stream/src/executor/source/source_backfill_executor.rs b/src/stream/src/executor/source/source_backfill_executor.rs index 9c3336878f952..b28c707bdedd0 100644 --- a/src/stream/src/executor/source/source_backfill_executor.rs +++ b/src/stream/src/executor/source/source_backfill_executor.rs @@ -27,7 +27,8 @@ use risingwave_common::system_param::reader::SystemParamsRead; use risingwave_common::types::JsonbVal; use risingwave_connector::source::reader::desc::{SourceDesc, SourceDescBuilder}; use risingwave_connector::source::{ - BoxChunkSourceStream, SourceContext, SourceCtrlOpts, SplitId, SplitImpl, SplitMetaData, + BackfillInfo, BoxChunkSourceStream, SourceContext, SourceCtrlOpts, SplitId, SplitImpl, + SplitMetaData, }; use serde::{Deserialize, Serialize}; use thiserror_ext::AsReport; @@ -43,6 +44,7 @@ use crate::executor::UpdateMutation; #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] pub enum BackfillState { /// `None` means not started yet. It's the initial state. + /// XXX: perhaps we can also set to low-watermark instead of `None` Backfilling(Option), /// Backfill is stopped at this offset (inclusive). Source needs to filter out messages before this offset. SourceCachingUp(String), @@ -90,6 +92,8 @@ pub struct SourceBackfillExecutorInner { /// Local variables used in the backfill stage. /// +/// See for a state diagram about how it works. +/// /// Note: all off the fields should contain all available splits, and we can `unwrap()` safely when `get()`. #[derive(Debug)] struct BackfillStage { @@ -99,8 +103,8 @@ struct BackfillStage { /// Note: the offsets are not updated. Should use `state`'s offset to update before using it (`get_latest_unfinished_splits`). splits: Vec, /// The latest offset from upstream (inclusive). After we reach this offset, we can stop backfilling. - /// TODO: initialize this with high watermark so that we can finish backfilling even when upstream - /// doesn't emit any data. + /// This is initialized with the latest available offset in the connector (if the connector provides the ability to fetch it) + /// so that we can finish backfilling even when upstream doesn't emit any data. target_offsets: HashMap>, } @@ -259,7 +263,7 @@ impl SourceBackfillExecutorInner { &self, source_desc: &SourceDesc, splits: Vec, - ) -> StreamExecutorResult { + ) -> StreamExecutorResult<(BoxChunkSourceStream, HashMap)> { let column_ids = source_desc .columns .iter() @@ -278,12 +282,22 @@ impl SourceBackfillExecutorInner { source_desc.source.config.clone(), None, ); - let stream = source_desc + + // We will check watermark to decide whether we need to backfill. + // e.g., when there's a Kafka topic-partition without any data, + // we don't need to backfill at all. But if we do not check here, + // the executor can only know it's finished when data coming in. + // For blocking DDL, this would be annoying. + + let (stream, backfill_info) = source_desc .source - .build_stream(Some(splits), column_ids, Arc::new(source_ctx)) + .build_stream_for_backfill(Some(splits), column_ids, Arc::new(source_ctx)) .await .map_err(StreamExecutorError::connector_error)?; - Ok(apply_rate_limit(stream, self.rate_limit_rps).boxed()) + Ok(( + apply_rate_limit(stream, self.rate_limit_rps).boxed(), + backfill_info, + )) } #[try_stream(ok = Message, error = StreamExecutorError)] @@ -337,13 +351,25 @@ impl SourceBackfillExecutorInner { // Return the ownership of `stream_source_core` to the source executor. self.stream_source_core = core; - let source_chunk_reader = self + let (source_chunk_reader, backfill_info) = self .build_stream_source_reader( &source_desc, backfill_stage.get_latest_unfinished_splits()?, ) .instrument_await("source_build_reader") .await?; + for (split_id, info) in &backfill_info { + match info { + BackfillInfo::NoDataToBackfill => { + *backfill_stage.states.get_mut(split_id).unwrap() = BackfillState::Finished; + } + BackfillInfo::HasDataToBackfill { latest_offset } => { + // Note: later we will override it with the offset from the source message, and it's possible to become smaller than this value. + *backfill_stage.target_offsets.get_mut(split_id).unwrap() = + Some(latest_offset.clone()); + } + } + } fn select_strategy(_: &mut ()) -> PollNext { futures::stream::PollNext::Left @@ -422,7 +448,7 @@ impl SourceBackfillExecutorInner { self.actor_ctx.fragment_id.to_string(), ]); - let reader = self + let (reader, _backfill_info) = self .build_stream_source_reader( &source_desc, backfill_stage.get_latest_unfinished_splits()?, @@ -504,7 +530,7 @@ impl SourceBackfillExecutorInner { ); // Replace the source reader with a new one of the new state. - let reader = self + let (reader, _backfill_info) = self .build_stream_source_reader( &source_desc, latest_unfinished_splits, @@ -602,6 +628,15 @@ impl SourceBackfillExecutorInner { } let mut splits: HashSet = backfill_stage.states.keys().cloned().collect(); + // Make sure `Finished` state is persisted. + self.backfill_state_store + .set_states( + splits + .iter() + .map(|s| (s.clone(), BackfillState::Finished)) + .collect(), + ) + .await?; // All splits finished backfilling. Now we only forward the source data. #[for_await] From a7480e1c6225e6bd32314fec9f5e0a686a9850b9 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Wed, 4 Sep 2024 10:27:31 +0800 Subject: [PATCH 16/26] feat(risedev): attempt to kill existing session when launching `risedev-dev` (#18370) Signed-off-by: Bugen Zhao --- .../src/task/configure_tmux_service.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/risedevtool/src/task/configure_tmux_service.rs b/src/risedevtool/src/task/configure_tmux_service.rs index a20274edfc3c1..af4581456611b 100644 --- a/src/risedevtool/src/task/configure_tmux_service.rs +++ b/src/risedevtool/src/task/configure_tmux_service.rs @@ -16,10 +16,10 @@ use std::env; use std::path::Path; use std::process::Command; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use console::style; -use crate::util::stylized_risedev_subcmd; +use crate::util::{risedev_cmd, stylized_risedev_subcmd}; use crate::{ExecuteContext, Task}; pub struct ConfigureTmuxTask; @@ -59,10 +59,17 @@ impl Task for ConfigureTmuxTask { let mut cmd = new_tmux_command(); cmd.arg("list-sessions"); if ctx.run_command(cmd).is_ok() { - bail!( - "A previous cluster is already running. Please kill it first with {}.", - stylized_risedev_subcmd("k"), - ); + ctx.pb.set_message("killing previous session..."); + + let mut cmd = Command::new(risedev_cmd()); + cmd.arg("k"); + ctx.run_command(cmd).with_context(|| { + format!( + "A previous cluster is already running while `risedev-dev` failed to kill it. \ + Please kill it manually with {}.", + stylized_risedev_subcmd("k") + ) + })?; } ctx.pb.set_message("creating new session..."); From 0a4ccde5c4b8762d825b150a0a73a80afdec6253 Mon Sep 17 00:00:00 2001 From: xiangjinwu <17769960+xiangjinwu@users.noreply.github.com> Date: Wed, 4 Sep 2024 13:12:56 +0800 Subject: [PATCH 17/26] fix(sqlparser): display create items with comma properly (#18393) --- src/sqlparser/src/ast/statement.rs | 12 ++++++++++-- src/sqlparser/tests/testdata/create.yaml | 6 ++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index f297530ac6aff..b8e1aa245b4ec 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -412,6 +412,7 @@ pub(super) fn fmt_create_items( || !watermarks.is_empty() || wildcard_idx.is_some(); has_items.then(|| write!(&mut items, "(")); + if let Some(wildcard_idx) = wildcard_idx { let (columns_l, columns_r) = columns.split_at(wildcard_idx); write!(&mut items, "{}", display_comma_separated(columns_l))?; @@ -426,14 +427,21 @@ pub(super) fn fmt_create_items( } else { write!(&mut items, "{}", display_comma_separated(columns))?; } - if !columns.is_empty() && (!constraints.is_empty() || !watermarks.is_empty()) { + let mut leading_items = !columns.is_empty() || wildcard_idx.is_some(); + + if leading_items && !constraints.is_empty() { write!(&mut items, ", ")?; } write!(&mut items, "{}", display_comma_separated(constraints))?; - if !columns.is_empty() && !constraints.is_empty() && !watermarks.is_empty() { + leading_items |= !constraints.is_empty(); + + if leading_items && !watermarks.is_empty() { write!(&mut items, ", ")?; } write!(&mut items, "{}", display_comma_separated(watermarks))?; + // uncomment this when adding more sections below + // leading_items |= !watermarks.is_empty(); + has_items.then(|| write!(&mut items, ")")); Ok(items) } diff --git a/src/sqlparser/tests/testdata/create.yaml b/src/sqlparser/tests/testdata/create.yaml index bcd94d53f1ed7..9fca29ad98527 100644 --- a/src/sqlparser/tests/testdata/create.yaml +++ b/src/sqlparser/tests/testdata/create.yaml @@ -42,6 +42,12 @@ - input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') formatted_ast: 'CreateSource { stmt: CreateSourceStatement { temporary: false, if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "brokers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }], key_encode: None }), source_watermarks: [], include_column_options: [] } }' +- input: CREATE SOURCE IF NOT EXISTS src (*, WATERMARK FOR event_time AS event_time - INTERVAL '60' SECOND) WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') + formatted_sql: CREATE SOURCE IF NOT EXISTS src (*, WATERMARK FOR event_time AS event_time - INTERVAL '60' SECOND) WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') + formatted_ast: 'CreateSource { stmt: CreateSourceStatement { temporary: false, if_not_exists: true, columns: [], wildcard_idx: Some(0), constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "brokers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }], key_encode: None }), source_watermarks: [SourceWatermark { column: Ident { value: "event_time", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "event_time", quote_style: None }), op: Minus, right: Value(Interval { value: "60", leading_field: Some(Second), leading_precision: None, last_field: None, fractional_seconds_precision: None }) } }], include_column_options: [] } }' +- input: CREATE SOURCE IF NOT EXISTS src (PRIMARY KEY (event_id), WATERMARK FOR event_time AS event_time - INTERVAL '60' SECOND) WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') + formatted_sql: CREATE SOURCE IF NOT EXISTS src (PRIMARY KEY (event_id), WATERMARK FOR event_time AS event_time - INTERVAL '60' SECOND) WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') + formatted_ast: 'CreateSource { stmt: CreateSourceStatement { temporary: false, if_not_exists: true, columns: [], wildcard_idx: None, constraints: [Unique { name: None, columns: [Ident { value: "event_id", quote_style: None }], is_primary: true }], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "brokers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }], key_encode: None }), source_watermarks: [SourceWatermark { column: Ident { value: "event_time", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "event_time", quote_style: None }), op: Minus, right: Value(Interval { value: "60", leading_field: Some(Second), leading_precision: None, last_field: None, fractional_seconds_precision: None }) } }], include_column_options: [] } }' - input: CREATE SOURCE bid (auction INTEGER, bidder INTEGER, price INTEGER, WATERMARK FOR auction AS auction - 1, "date_time" TIMESTAMP) with (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') formatted_sql: CREATE SOURCE bid (auction INT, bidder INT, price INT, "date_time" TIMESTAMP, WATERMARK FOR auction AS auction - 1) WITH (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') FORMAT NATIVE ENCODE NATIVE formatted_ast: 'CreateSource { stmt: CreateSourceStatement { temporary: false, if_not_exists: false, columns: [ColumnDef { name: Ident { value: "auction", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "bidder", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "price", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "date_time", quote_style: Some(''"'') }, data_type: Some(Timestamp(false)), collation: None, options: [] }], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "bid", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "connector", quote_style: None }]), value: SingleQuotedString("nexmark") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "table", quote_style: None }, Ident { value: "type", quote_style: None }]), value: SingleQuotedString("Bid") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "split", quote_style: None }, Ident { value: "num", quote_style: None }]), value: SingleQuotedString("12") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "min", quote_style: None }, Ident { value: "event", quote_style: None }, Ident { value: "gap", quote_style: None }, Ident { value: "in", quote_style: None }, Ident { value: "ns", quote_style: None }]), value: SingleQuotedString("0") }]), source_schema: V2(ConnectorSchema { format: Native, row_encode: Native, row_options: [], key_encode: None }), source_watermarks: [SourceWatermark { column: Ident { value: "auction", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "auction", quote_style: None }), op: Minus, right: Value(Number("1")) } }], include_column_options: [] } }' From c4b1dd4555c329a58fc92955b4a7aa9a3d957d89 Mon Sep 17 00:00:00 2001 From: xxchan Date: Wed, 4 Sep 2024 13:16:08 +0800 Subject: [PATCH 18/26] feat(expr): support `jsonb_populate_map` (#18378) Signed-off-by: xxchan --- e2e_test/batch/types/map.slt.part | 57 +++++++++++++++++++ proto/expr.proto | 1 + src/common/src/types/jsonb.rs | 26 ++++++++- src/expr/impl/src/scalar/jsonb_record.rs | 18 +++++- .../binder/expr/function/builtin_scalar.rs | 1 + src/frontend/src/expr/pure.rs | 1 + .../src/optimizer/plan_expr_visitor/strong.rs | 1 + 7 files changed, 103 insertions(+), 2 deletions(-) diff --git a/e2e_test/batch/types/map.slt.part b/e2e_test/batch/types/map.slt.part index b4b4be7e5cba7..fe98fa3633000 100644 --- a/e2e_test/batch/types/map.slt.part +++ b/e2e_test/batch/types/map.slt.part @@ -122,6 +122,63 @@ select to_jsonb(m1), to_jsonb(m2), to_jsonb(m3), to_jsonb(l), to_jsonb(s) from t {"a": 1.0, "b": 2.0, "c": 3.0} null null null null {"a": 1.0, "b": 2.0, "c": 3.0} {"1": true, "2": false, "3": true} {"a": {"a1": "a2"}, "b": {"b1": "b2"}} [{"a": 1, "b": 2, "c": 3}, {"d": 4, "e": 5, "f": 6}] {"m": {"a": {"x": 1}, "b": {"x": 2}, "c": {"x": 3}}} +query ? +select jsonb_populate_map( + null::map(varchar, int), + '{"a": 1, "b": 2}'::jsonb +); +---- +{a:1,b:2} + + +query ? +select jsonb_populate_map( + MAP {'a': 1, 'b': 2}, + '{"b": 3, "c": 4}'::jsonb +); +---- +{a:1,b:3,c:4} + + +# implicit cast (int -> varchar) +query ? +select jsonb_populate_map( + MAP {'a': 'a', 'b': 'b'}, + '{"b": 3, "c": 4}'::jsonb +); +---- +{a:a,b:3,c:4} + + +query error +select jsonb_populate_map( + MAP {'a': 1, 'b': 2}, + '{"b": "3", "c": 4}'::jsonb +); +---- +db error: ERROR: Failed to run the query + +Caused by these errors (recent errors listed first): + 1: Expr error + 2: error while evaluating expression `jsonb_populate_map('{a:1,b:2}', '{"b": "3", "c": 4}')` + 3: Parse error: cannot cast jsonb string to type number + + +query error +select jsonb_populate_map( + null::map(int, int), + '{"a": 1, "b": 2}'::jsonb +); +---- +db error: ERROR: Failed to run the query + +Caused by these errors (recent errors listed first): + 1: Expr error + 2: error while evaluating expression `jsonb_populate_map(NULL, '{"a": 1, "b": 2}')` + 3: Parse error: cannot convert jsonb to a map with non-string keys + + + statement ok drop table t; diff --git a/proto/expr.proto b/proto/expr.proto index e5b5fb73ba8ff..53bba96cc587b 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -282,6 +282,7 @@ message ExprNode { JSONB_POPULATE_RECORD = 629; JSONB_TO_RECORD = 630; JSONB_SET = 631; + JSONB_POPULATE_MAP = 632; // Map functions MAP_FROM_ENTRIES = 700; diff --git a/src/common/src/types/jsonb.rs b/src/common/src/types/jsonb.rs index fa80069080ff4..6363864fd73e2 100644 --- a/src/common/src/types/jsonb.rs +++ b/src/common/src/types/jsonb.rs @@ -20,7 +20,9 @@ use jsonbb::{Value, ValueRef}; use postgres_types::{accepts, to_sql_checked, FromSql, IsNull, ToSql, Type}; use risingwave_common_estimate_size::EstimateSize; -use super::{Datum, IntoOrdered, ListValue, ScalarImpl, StructRef, ToOwnedDatum, F64}; +use super::{ + Datum, IntoOrdered, ListValue, MapType, MapValue, ScalarImpl, StructRef, ToOwnedDatum, F64, +}; use crate::types::{DataType, Scalar, ScalarRef, StructType, StructValue}; use crate::util::iter_util::ZipEqDebug; @@ -464,6 +466,28 @@ impl<'a> JsonbRef<'a> { Ok(StructValue::new(fields)) } + pub fn to_map(self, ty: &MapType) -> Result { + let object = self + .0 + .as_object() + .ok_or_else(|| format!("cannot convert to map from a jsonb {}", self.type_name()))?; + if !matches!(ty.key(), DataType::Varchar) { + return Err("cannot convert jsonb to a map with non-string keys".to_string()); + } + + let mut keys: Vec = Vec::with_capacity(object.len()); + let mut values: Vec = Vec::with_capacity(object.len()); + for (k, v) in object.iter() { + let v = Self(v).to_datum(ty.value())?; + keys.push(Some(ScalarImpl::Utf8(k.to_owned().into()))); + values.push(v); + } + MapValue::try_from_kv( + ListValue::from_datum_iter(ty.key(), keys), + ListValue::from_datum_iter(ty.value(), values), + ) + } + /// Expands the top-level JSON object to a row having the struct type of the `base` argument. pub fn populate_struct( self, diff --git a/src/expr/impl/src/scalar/jsonb_record.rs b/src/expr/impl/src/scalar/jsonb_record.rs index b85feb9190d2a..a6def7cb25643 100644 --- a/src/expr/impl/src/scalar/jsonb_record.rs +++ b/src/expr/impl/src/scalar/jsonb_record.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_common::types::{JsonbRef, StructRef, StructValue}; +use risingwave_common::types::{JsonbRef, MapRef, MapValue, Scalar, StructRef, StructValue}; use risingwave_expr::expr::Context; use risingwave_expr::{function, ExprError, Result}; @@ -60,6 +60,22 @@ fn jsonb_populate_record( jsonb.populate_struct(output_type, base).map_err(parse_err) } +#[function("jsonb_populate_map(anymap, jsonb) -> anymap")] +pub fn jsonb_populate_map( + base: Option>, + v: JsonbRef<'_>, + ctx: &Context, +) -> Result { + let output_type = ctx.return_type.as_map(); + let jsonb_map = v + .to_map(output_type) + .map_err(|e| ExprError::Parse(e.into()))?; + match base { + Some(base) => Ok(MapValue::concat(base, jsonb_map.as_scalar_ref())), + None => Ok(jsonb_map), + } +} + /// Expands the top-level JSON array of objects to a set of rows having the composite type of the /// base argument. Each element of the JSON array is processed as described above for /// `jsonb_populate_record`. diff --git a/src/frontend/src/binder/expr/function/builtin_scalar.rs b/src/frontend/src/binder/expr/function/builtin_scalar.rs index 73eb722b26011..d46681c51ab3e 100644 --- a/src/frontend/src/binder/expr/function/builtin_scalar.rs +++ b/src/frontend/src/binder/expr/function/builtin_scalar.rs @@ -399,6 +399,7 @@ impl Binder { ("jsonb_path_query_array", raw_call(ExprType::JsonbPathQueryArray)), ("jsonb_path_query_first", raw_call(ExprType::JsonbPathQueryFirst)), ("jsonb_set", raw_call(ExprType::JsonbSet)), + ("jsonb_populate_map", raw_call(ExprType::JsonbPopulateMap)), // map ("map_from_entries", raw_call(ExprType::MapFromEntries)), ("map_access",raw_call(ExprType::MapAccess)), diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs index 3e6c83d8330fb..d47cc3851f641 100644 --- a/src/frontend/src/expr/pure.rs +++ b/src/frontend/src/expr/pure.rs @@ -211,6 +211,7 @@ impl ExprVisitor for ImpureAnalyzer { | Type::JsonbPathQueryArray | Type::JsonbPathQueryFirst | Type::JsonbSet + | Type::JsonbPopulateMap | Type::IsJson | Type::ToJsonb | Type::Sind diff --git a/src/frontend/src/optimizer/plan_expr_visitor/strong.rs b/src/frontend/src/optimizer/plan_expr_visitor/strong.rs index 2c14fc730877d..673a5f41746bb 100644 --- a/src/frontend/src/optimizer/plan_expr_visitor/strong.rs +++ b/src/frontend/src/optimizer/plan_expr_visitor/strong.rs @@ -291,6 +291,7 @@ impl Strong { | ExprType::JsonbPopulateRecord | ExprType::JsonbToRecord | ExprType::JsonbSet + | ExprType::JsonbPopulateMap | ExprType::MapFromEntries | ExprType::MapAccess | ExprType::MapKeys From 6402328e7ff51b16691c6b72858b7075250246fe Mon Sep 17 00:00:00 2001 From: xxchan Date: Wed, 4 Sep 2024 13:34:29 +0800 Subject: [PATCH 19/26] refactor: add some comments for source splits (#18034) --- proto/meta.proto | 2 ++ proto/stream_plan.proto | 3 +++ src/connector/src/with_options.rs | 4 ++++ src/frontend/src/handler/create_source.rs | 3 ++- src/meta/src/barrier/command.rs | 1 + src/meta/src/model/stream.rs | 3 ++- 6 files changed, 14 insertions(+), 2 deletions(-) diff --git a/proto/meta.proto b/proto/meta.proto index d75494625edd4..8932dcbc9e033 100644 --- a/proto/meta.proto +++ b/proto/meta.proto @@ -99,6 +99,7 @@ message TableFragments { State state = 2; map fragments = 3; map actor_status = 4; + // `Source` and `SourceBackfill` are handled together here. map actor_splits = 5; stream_plan.StreamContext ctx = 6; @@ -513,6 +514,7 @@ message GetClusterInfoRequest {} message GetClusterInfoResponse { repeated common.WorkerNode worker_nodes = 1; repeated TableFragments table_fragments = 2; + // `Source` and `SourceBackfill` are handled together here. map actor_splits = 3; map source_infos = 4; uint64 revision = 5; diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto index 5ea2f018eee20..a96f54818146e 100644 --- a/proto/stream_plan.proto +++ b/proto/stream_plan.proto @@ -23,6 +23,7 @@ message AddMutation { // All actors to be added (to the main connected component of the graph) in this update. repeated uint32 added_actors = 3; // We may embed a source change split mutation here. + // `Source` and `SourceBackfill` are handled together here. // TODO: we may allow multiple mutations in a single barrier. map actor_splits = 2; // We may embed a pause mutation here. @@ -70,6 +71,7 @@ message UpdateMutation { // All actors to be dropped in this update. repeated uint32 dropped_actors = 4; // Source updates. + // `Source` and `SourceBackfill` are handled together here. map actor_splits = 5; // When modifying the Materialized View, we need to recreate the Dispatcher from the old upstream to the new TableFragment. // Consistent with the semantics in AddMutation. @@ -77,6 +79,7 @@ message UpdateMutation { } message SourceChangeSplitMutation { + // `Source` and `SourceBackfill` are handled together here. map actor_splits = 2; } diff --git a/src/connector/src/with_options.rs b/src/connector/src/with_options.rs index ae2d432fdfd74..065c9394b8a49 100644 --- a/src/connector/src/with_options.rs +++ b/src/connector/src/with_options.rs @@ -126,6 +126,10 @@ pub trait WithPropertiesExt: Get + Sized { CdcTableType::from_properties(self).enable_transaction_metadata() } + fn is_shareable_non_cdc_connector(&self) -> bool { + self.is_kafka_connector() + } + #[inline(always)] fn is_iceberg_connector(&self) -> bool { let Some(connector) = self.get_connector() else { diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index f006ca929f54c..432f814cd4c41 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -1640,7 +1640,8 @@ pub async fn handle_create_source( let create_cdc_source_job = with_properties.is_shareable_cdc_connector(); let is_shared = create_cdc_source_job - || (with_properties.is_kafka_connector() && session.config().rw_enable_shared_source()); + || (with_properties.is_shareable_non_cdc_connector() + && session.config().rw_enable_shared_source()); let (columns_from_resolve_source, mut source_info) = if create_cdc_source_job { bind_columns_from_source_for_cdc(&session, &source_schema)? diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs index 0bea5f37940d6..cf6f251b359c2 100644 --- a/src/meta/src/barrier/command.rs +++ b/src/meta/src/barrier/command.rs @@ -78,6 +78,7 @@ pub struct Reschedule { /// Reassigned splits for source actors. /// It becomes the `actor_splits` in [`UpdateMutation`]. + /// `Source` and `SourceBackfill` are handled together here. pub actor_splits: HashMap>, /// Whether this fragment is injectable. The injectable means whether the fragment contains diff --git a/src/meta/src/model/stream.rs b/src/meta/src/model/stream.rs index bec6b95cfb0f9..447cf5cf85645 100644 --- a/src/meta/src/model/stream.rs +++ b/src/meta/src/model/stream.rs @@ -106,7 +106,8 @@ pub struct TableFragments { /// The status of actors pub actor_status: BTreeMap, - /// The splits of actors + /// The splits of actors, + /// incl. both `Source` and `SourceBackfill` actors. pub actor_splits: HashMap>, /// The streaming context associated with this stream plan and its fragments From ee33271dd98cfa84e390656e5b5836f7453b07cc Mon Sep 17 00:00:00 2001 From: Bohan Zhang Date: Wed, 4 Sep 2024 13:50:46 +0800 Subject: [PATCH 20/26] fix: parquet test missing comma (#18397) Signed-off-by: tabVersion --- e2e_test/s3/fs_parquet_source_and_sink.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e_test/s3/fs_parquet_source_and_sink.py b/e2e_test/s3/fs_parquet_source_and_sink.py index 3ae00d3fcee15..033cb73ffbe70 100644 --- a/e2e_test/s3/fs_parquet_source_and_sink.py +++ b/e2e_test/s3/fs_parquet_source_and_sink.py @@ -137,7 +137,7 @@ def _table(): s3.bucket_name = '{config['S3_BUCKET']}', s3.credentials.access = '{config['S3_ACCESS_KEY']}', s3.credentials.secret = '{config['S3_SECRET_KEY']}', - s3.endpoint_url = 'https://{config['S3_ENDPOINT']}' + s3.endpoint_url = 'https://{config['S3_ENDPOINT']}', s3.path = '', s3.file_type = 'parquet', type = 'append-only', From 9923c3abb8f87a6f643d5f753585e2b31f4de7e0 Mon Sep 17 00:00:00 2001 From: xxchan Date: Wed, 4 Sep 2024 15:52:57 +0800 Subject: [PATCH 21/26] feat(risedev): support providing env var in yaml config (#18396) Signed-off-by: xxchan --- risedev.yml | 6 ++- src/risedevtool/src/bin/risedev-compose.rs | 4 +- src/risedevtool/src/bin/risedev-dev.rs | 7 +-- src/risedevtool/src/config.rs | 45 +++++++++++++++++-- .../src/task/configure_tmux_service.rs | 15 ++++--- 5 files changed, 62 insertions(+), 15 deletions(-) diff --git a/risedev.yml b/risedev.yml index 3c7f8e0e09be4..22c4569adb610 100644 --- a/risedev.yml +++ b/risedev.yml @@ -16,8 +16,12 @@ profile: # The default configuration will start 1 compute node, 1 meta node and 1 frontend. default: - # Specify a configuration file to override the default settings + # # Specify a configuration file to override the default settings # config-path: src/config/example.toml + # # Specify custom environment variables + # env: + # RUST_LOG: "info,risingwave_storage::hummock=off" + # RW_ENABLE_PRETTY_LOG: "true" steps: # If you want to use the local s3 storage, enable the following line # - use: minio diff --git a/src/risedevtool/src/bin/risedev-compose.rs b/src/risedevtool/src/bin/risedev-compose.rs index 0547fda6b7008..e51961831056a 100644 --- a/src/risedevtool/src/bin/risedev-compose.rs +++ b/src/risedevtool/src/bin/risedev-compose.rs @@ -82,11 +82,11 @@ fn main() -> Result<()> { ) .collect(); - let (config_path, expanded_config) = + let (config_path, _env, expanded_config) = ConfigExpander::expand_with_extra_info(".", &opts.profile, extra_info)?; (expanded_config, Some(compose_deploy_config), config_path) } else { - let (config_path, expanded_config) = ConfigExpander::expand(".", &opts.profile)?; + let (config_path, _env, expanded_config) = ConfigExpander::expand(".", &opts.profile)?; (expanded_config, None, config_path) }; diff --git a/src/risedevtool/src/bin/risedev-dev.rs b/src/risedevtool/src/bin/risedev-dev.rs index c53453b3f903d..80415e321d805 100644 --- a/src/risedevtool/src/bin/risedev-dev.rs +++ b/src/risedevtool/src/bin/risedev-dev.rs @@ -66,6 +66,7 @@ impl ProgressManager { fn task_main( manager: &mut ProgressManager, services: &Vec, + env: Vec, ) -> Result<(Vec<(String, Duration)>, String)> { let log_path = env::var("PREFIX_LOG")?; @@ -82,7 +83,7 @@ fn task_main( // Start Tmux and kill previous services { let mut ctx = ExecuteContext::new(&mut logger, manager.new_progress(), status_dir.clone()); - let mut service = ConfigureTmuxTask::new()?; + let mut service = ConfigureTmuxTask::new(env)?; service.execute(&mut ctx)?; writeln!( @@ -392,7 +393,7 @@ fn main() -> Result<()> { .nth(1) .unwrap_or_else(|| "default".to_string()); - let (config_path, risedev_config) = ConfigExpander::expand(".", &task_name)?; + let (config_path, env, risedev_config) = ConfigExpander::expand(".", &task_name)?; if let Some(config_path) = &config_path { let target = Path::new(&env::var("PREFIX_CONFIG")?).join("risingwave.toml"); @@ -420,7 +421,7 @@ fn main() -> Result<()> { services.len(), task_name )); - let task_result = task_main(&mut manager, &services); + let task_result = task_main(&mut manager, &services, env); match task_result { Ok(_) => { diff --git a/src/risedevtool/src/config.rs b/src/risedevtool/src/config.rs index e4ba9acdf4e19..1abab635b88c1 100644 --- a/src/risedevtool/src/config.rs +++ b/src/risedevtool/src/config.rs @@ -50,6 +50,24 @@ impl ConfigExpander { /// Transforms `risedev.yml` and `risedev-profiles.user.yml` to a fully expanded yaml file. /// + /// Format: + /// + /// ```yaml + /// my-profile: + /// config-path: src/config/ci-recovery.toml + /// env: + /// RUST_LOG: "info,risingwave_storage::hummock=off" + /// RW_ENABLE_PRETTY_LOG: "true" + /// steps: + /// - use: minio + /// - use: sqlite + /// - use: meta-node + /// meta-backend: sqlite + /// - use: compute-node + /// parallelism: 1 + /// - use: frontend + /// ``` + /// /// # Arguments /// /// * `root` is the root directory of these YAML files. @@ -58,8 +76,11 @@ impl ConfigExpander { /// /// # Returns /// - /// A pair of `config_path` and expanded steps (items in `{profile}.steps` section in YAML) - pub fn expand(root: impl AsRef, profile: &str) -> Result<(Option, Yaml)> { + /// `(config_path, env, steps)` + pub fn expand( + root: impl AsRef, + profile: &str, + ) -> Result<(Option, Vec, Yaml)> { Self::expand_with_extra_info(root, profile, HashMap::new()) } @@ -72,7 +93,7 @@ impl ConfigExpander { root: impl AsRef, profile: &str, extra_info: HashMap, - ) -> Result<(Option, Yaml)> { + ) -> Result<(Option, Vec, Yaml)> { let global_path = root.as_ref().join(RISEDEV_CONFIG_FILE); let global_yaml = Self::load_yaml(global_path)?; let global_config = global_yaml @@ -120,6 +141,22 @@ impl ConfigExpander { .get(&Yaml::String("config-path".to_string())) .and_then(|s| s.as_str()) .map(|s| s.to_string()); + let mut env = vec![]; + if let Some(env_section) = profile_section.get(&Yaml::String("env".to_string())) { + let env_section = env_section + .as_hash() + .ok_or_else(|| anyhow!("expect `env` section to be a hashmap"))?; + + for (k, v) in env_section { + let key = k + .as_str() + .ok_or_else(|| anyhow!("expect env key to be a string"))?; + let value = v + .as_str() + .ok_or_else(|| anyhow!("expect env value to be a string"))?; + env.push(format!("{}={}", key, value)); + } + } let steps = profile_section .get(&Yaml::String("steps".to_string())) @@ -131,7 +168,7 @@ impl ConfigExpander { let steps = IdExpander::new(&steps)?.visit(steps)?; let steps = ProvideExpander::new(&steps)?.visit(steps)?; - Ok((config_path, steps)) + Ok((config_path, env, steps)) } /// Parses the expanded yaml into [`ServiceConfig`]s. diff --git a/src/risedevtool/src/task/configure_tmux_service.rs b/src/risedevtool/src/task/configure_tmux_service.rs index af4581456611b..925cb2de38444 100644 --- a/src/risedevtool/src/task/configure_tmux_service.rs +++ b/src/risedevtool/src/task/configure_tmux_service.rs @@ -22,7 +22,9 @@ use console::style; use crate::util::{risedev_cmd, stylized_risedev_subcmd}; use crate::{ExecuteContext, Task}; -pub struct ConfigureTmuxTask; +pub struct ConfigureTmuxTask { + env: Vec, +} pub const RISEDEV_NAME: &str = "risedev"; @@ -33,8 +35,8 @@ pub fn new_tmux_command() -> Command { } impl ConfigureTmuxTask { - pub fn new() -> Result { - Ok(Self) + pub fn new(env: Vec) -> Result { + Ok(Self { env }) } } @@ -78,8 +80,11 @@ impl Task for ConfigureTmuxTask { cmd.arg("new-session") // this will automatically create the `risedev` tmux server .arg("-d") .arg("-s") - .arg(RISEDEV_NAME) - .arg("-c") + .arg(RISEDEV_NAME); + for e in &self.env { + cmd.arg("-e").arg(e); + } + cmd.arg("-c") .arg(Path::new(&prefix_path)) .arg(Path::new(&prefix_bin).join("welcome.sh")); From 0dd06ffa9a34b0cb584393990f8de8a41267718d Mon Sep 17 00:00:00 2001 From: William Wen <44139337+wenym1@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:48:03 +0800 Subject: [PATCH 22/26] feat(snapshot-backfill): only receive mutation from barrier worker for snapshot backfill (#18210) --- src/meta/src/barrier/command.rs | 21 +- src/meta/src/barrier/creating_job/mod.rs | 10 +- src/meta/src/barrier/creating_job/status.rs | 21 +- src/meta/src/barrier/mod.rs | 14 + src/meta/src/barrier/rpc.rs | 69 ++--- .../executor/backfill/snapshot_backfill.rs | 266 ++++++++++++++---- src/stream/src/executor/mod.rs | 34 ++- 7 files changed, 321 insertions(+), 114 deletions(-) diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs index cf6f251b359c2..6e4ebe40b93b0 100644 --- a/src/meta/src/barrier/command.rs +++ b/src/meta/src/barrier/command.rs @@ -497,16 +497,16 @@ impl CommandContext { } } -impl CommandContext { +impl Command { /// Generate a mutation for the given command. - pub fn to_mutation(&self) -> Option { + pub fn to_mutation(&self, current_paused_reason: Option<&PausedReason>) -> Option { let mutation = - match &self.command { + match self { Command::Plain(mutation) => mutation.clone(), Command::Pause(_) => { // Only pause when the cluster is not already paused. - if self.current_paused_reason.is_none() { + if current_paused_reason.is_none() { Some(Mutation::Pause(PauseMutation {})) } else { None @@ -515,7 +515,7 @@ impl CommandContext { Command::Resume(reason) => { // Only resume when the cluster is paused with the same reason. - if self.current_paused_reason == Some(*reason) { + if current_paused_reason == Some(reason) { Some(Mutation::Resume(ResumeMutation {})) } else { None @@ -607,7 +607,7 @@ impl CommandContext { added_actors, actor_splits, // If the cluster is already paused, the new actors should be paused too. - pause: self.current_paused_reason.is_some(), + pause: current_paused_reason.is_some(), subscriptions_to_add, })); @@ -846,7 +846,7 @@ impl CommandContext { } pub fn actors_to_create(&self) -> Option>> { - match &self.command { + match self { Command::CreateStreamingJob { info, job_type } => { let mut map = match job_type { CreateStreamingJobType::Normal => HashMap::new(), @@ -914,6 +914,13 @@ impl CommandContext { ..Default::default() })) } +} + +impl CommandContext { + pub fn to_mutation(&self) -> Option { + self.command + .to_mutation(self.current_paused_reason.as_ref()) + } /// Returns the paused reason after executing the current command. pub fn next_paused_reason(&self) -> Option { diff --git a/src/meta/src/barrier/creating_job/mod.rs b/src/meta/src/barrier/creating_job/mod.rs index c5a52437e2b7d..9e4e52b0e36b8 100644 --- a/src/meta/src/barrier/creating_job/mod.rs +++ b/src/meta/src/barrier/creating_job/mod.rs @@ -28,6 +28,7 @@ use risingwave_common::util::epoch::Epoch; use risingwave_pb::common::WorkerNode; use risingwave_pb::ddl_service::DdlProgress; use risingwave_pb::hummock::HummockVersionStats; +use risingwave_pb::stream_plan::barrier_mutation::Mutation; use risingwave_pb::stream_service::{BarrierCompleteResponse, BuildActorInfo}; use tracing::{debug, info}; @@ -67,6 +68,7 @@ impl CreatingStreamingJobControl { backfill_epoch: u64, version_stat: &HummockVersionStats, metrics: &MetaMetrics, + initial_mutation: Mutation, ) -> Self { info!( table_id = info.table_fragments.table_id().table_id, @@ -108,7 +110,7 @@ impl CreatingStreamingJobControl { backfill_epoch, pending_non_checkpoint_barriers: vec![], snapshot_backfill_actors, - actors_to_create: Some( + initial_barrier_info: Some(( actors_to_create .into_iter() .map(|(worker_id, actors)| { @@ -124,7 +126,8 @@ impl CreatingStreamingJobControl { ) }) .collect(), - ), + initial_mutation, + )), }, upstream_lag: metrics .snapshot_backfill_lag @@ -283,11 +286,12 @@ impl CreatingStreamingJobControl { prev_epoch, kind, new_actors, + mutation, } in barriers_to_inject { let node_to_collect = control_stream_manager.inject_barrier( Some(table_id), - None, + mutation, (&curr_epoch, &prev_epoch), &kind, graph_info, diff --git a/src/meta/src/barrier/creating_job/status.rs b/src/meta/src/barrier/creating_job/status.rs index 0569752b1056b..f5d4c37d247a6 100644 --- a/src/meta/src/barrier/creating_job/status.rs +++ b/src/meta/src/barrier/creating_job/status.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use risingwave_common::util::epoch::Epoch; use risingwave_pb::hummock::HummockVersionStats; +use risingwave_pb::stream_plan::barrier_mutation::Mutation; use risingwave_pb::stream_service::barrier_complete_response::CreateMviewProgress; use risingwave_pb::stream_service::BuildActorInfo; @@ -40,7 +41,9 @@ pub(super) enum CreatingStreamingJobStatus { /// The `prev_epoch` of pending non checkpoint barriers pending_non_checkpoint_barriers: Vec, snapshot_backfill_actors: HashMap>, - actors_to_create: Option>>, + /// Info of the first barrier: (`actors_to_create`, `mutation`) + /// Take the mutation out when injecting the first barrier + initial_barrier_info: Option<(HashMap>, Mutation)>, }, ConsumingLogStore { graph_info: InflightGraphInfo, @@ -60,6 +63,7 @@ pub(super) struct CreatingJobInjectBarrierInfo { pub prev_epoch: TracedEpoch, pub kind: BarrierKind, pub new_actors: Option>>, + pub mutation: Option, } impl CreatingStreamingJobStatus { @@ -104,12 +108,12 @@ impl CreatingStreamingJobStatus { graph_info, pending_non_checkpoint_barriers, ref backfill_epoch, - actors_to_create, + initial_barrier_info, .. } = self { if create_mview_tracker.has_pending_finished_jobs() { - assert!(actors_to_create.is_none()); + assert!(initial_barrier_info.is_none()); pending_non_checkpoint_barriers.push(*backfill_epoch); let prev_epoch = Epoch::from_physical_time(*prev_epoch_fake_physical_time); @@ -119,6 +123,7 @@ impl CreatingStreamingJobStatus { prev_epoch: TracedEpoch::new(prev_epoch), kind: BarrierKind::Checkpoint(take(pending_non_checkpoint_barriers)), new_actors: None, + mutation: None, }] .into_iter() .chain(pending_commands.drain(..).map(|command_ctx| { @@ -127,6 +132,7 @@ impl CreatingStreamingJobStatus { prev_epoch: command_ctx.prev_epoch.clone(), kind: command_ctx.kind.clone(), new_actors: None, + mutation: None, } })) .collect(); @@ -145,12 +151,19 @@ impl CreatingStreamingJobStatus { } else { BarrierKind::Barrier }; + let (new_actors, mutation) = + if let Some((new_actors, mutation)) = initial_barrier_info.take() { + (Some(new_actors), Some(mutation)) + } else { + Default::default() + }; Some(( vec![CreatingJobInjectBarrierInfo { curr_epoch, prev_epoch, kind, - new_actors: actors_to_create.take(), + new_actors, + mutation, }], None, )) diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs index 5fc9dc5112a65..daa82306bff6d 100644 --- a/src/meta/src/barrier/mod.rs +++ b/src/meta/src/barrier/mod.rs @@ -965,6 +965,19 @@ impl GlobalBarrierManager { info, } = &command { + if self.state.paused_reason().is_some() { + warn!("cannot create streaming job with snapshot backfill when paused"); + for notifier in notifiers { + notifier.notify_start_failed( + anyhow!("cannot create streaming job with snapshot backfill when paused",) + .into(), + ); + } + return Ok(()); + } + let mutation = command + .to_mutation(None) + .expect("should have some mutation in `CreateStreamingJob` command"); self.checkpoint_control .creating_streaming_job_controls .insert( @@ -975,6 +988,7 @@ impl GlobalBarrierManager { prev_epoch.value().0, &self.checkpoint_control.hummock_version_stats, &self.context.metrics, + mutation, ), ); } diff --git a/src/meta/src/barrier/rpc.rs b/src/meta/src/barrier/rpc.rs index 7ad468b04aa4c..14ee8b0c15f7b 100644 --- a/src/meta/src/barrier/rpc.rs +++ b/src/meta/src/barrier/rpc.rs @@ -263,39 +263,42 @@ impl ControlStreamManager { pre_applied_graph_info, applied_graph_info, actor_ids_to_pre_sync_mutation, - command_ctx.actors_to_create().map(|actors_to_create| { - actors_to_create - .into_iter() - .map(|(worker_id, actors)| { - ( - worker_id, - actors - .into_iter() - .map(|actor| BuildActorInfo { - actor: Some(actor), - // TODO: consider subscriber of backfilling mv - related_subscriptions: command_ctx - .subscription_info - .mv_depended_subscriptions - .iter() - .map(|(table_id, subscriptions)| { - ( - table_id.table_id, - SubscriptionIds { - subscription_ids: subscriptions - .keys() - .cloned() - .collect(), - }, - ) - }) - .collect(), - }) - .collect_vec(), - ) - }) - .collect() - }), + command_ctx + .command + .actors_to_create() + .map(|actors_to_create| { + actors_to_create + .into_iter() + .map(|(worker_id, actors)| { + ( + worker_id, + actors + .into_iter() + .map(|actor| BuildActorInfo { + actor: Some(actor), + // TODO: consider subscriber of backfilling mv + related_subscriptions: command_ctx + .subscription_info + .mv_depended_subscriptions + .iter() + .map(|(table_id, subscriptions)| { + ( + table_id.table_id, + SubscriptionIds { + subscription_ids: subscriptions + .keys() + .cloned() + .collect(), + }, + ) + }) + .collect(), + }) + .collect_vec(), + ) + }) + .collect() + }), ) } diff --git a/src/stream/src/executor/backfill/snapshot_backfill.rs b/src/stream/src/executor/backfill/snapshot_backfill.rs index 35adc33b81c4f..ac625f53a02dd 100644 --- a/src/stream/src/executor/backfill/snapshot_backfill.rs +++ b/src/stream/src/executor/backfill/snapshot_backfill.rs @@ -32,14 +32,16 @@ use risingwave_storage::table::batch_table::storage_table::StorageTable; use risingwave_storage::table::ChangeLogRow; use risingwave_storage::StateStore; use tokio::select; +use tokio::sync::mpsc; use tokio::sync::mpsc::UnboundedReceiver; use crate::executor::backfill::utils::{create_builder, mapping_chunk}; use crate::executor::monitor::StreamingMetrics; use crate::executor::prelude::{try_stream, StreamExt}; use crate::executor::{ - expect_first_barrier, ActorContextRef, BackfillExecutor, Barrier, BoxedMessageStream, Execute, - Executor, Message, Mutation, StreamExecutorError, StreamExecutorResult, + expect_first_barrier, ActorContextRef, BackfillExecutor, Barrier, BoxedMessageStream, + DispatcherBarrier, DispatcherMessage, Execute, Executor, Message, Mutation, + StreamExecutorError, StreamExecutorResult, }; use crate::task::CreateMviewProgress; @@ -99,7 +101,7 @@ impl SnapshotBackfillExecutor { #[try_stream(ok = Message, error = StreamExecutorError)] async fn execute_inner(mut self) { debug!("snapshot backfill executor start"); - let mut upstream = self.upstream.execute(); + let mut upstream = erase_upstream_mutation(self.upstream.execute()); let upstream_table_id = self.upstream_table.table_id(); let first_barrier = expect_first_barrier(&mut upstream).await?; debug!(epoch = ?first_barrier.epoch, "get first upstream barrier"); @@ -109,7 +111,7 @@ impl SnapshotBackfillExecutor { { if should_backfill { - let subscriber_ids = first_barrier + let subscriber_ids = first_recv_barrier .added_subscriber_on_mv_table(upstream_table_id) .collect_vec(); let snapshot_backfill_table_fragment_id = match subscriber_ids.as_slice() { @@ -183,12 +185,15 @@ impl SnapshotBackfillExecutor { let recv_barrier = self.barrier_rx.recv().await.expect("should exist"); assert_eq!(first_barrier.epoch, recv_barrier.epoch); - yield Message::Barrier(first_barrier); + yield Message::Barrier(recv_barrier); } + let mut upstream_buffer = + upstream_buffer.start_consuming_log_store(&mut self.barrier_rx); + let mut barrier_epoch = first_barrier_epoch; - let initial_pending_barrier = upstream_buffer.barrier.len(); + let initial_pending_barrier = upstream_buffer.state.barrier_count(); info!( ?barrier_epoch, table_id = self.upstream_table.table_id().table_id, @@ -207,8 +212,6 @@ impl SnapshotBackfillExecutor { // Phase 2: consume upstream log store while let Some(barrier) = upstream_buffer.take_buffered_barrier().await? { - let recv_barrier = receive_next_barrier(&mut self.barrier_rx).await?; - assert_eq!(barrier.epoch, recv_barrier.epoch); assert_eq!(barrier_epoch.curr, barrier.epoch.prev); barrier_epoch = barrier.epoch; @@ -254,16 +257,20 @@ impl SnapshotBackfillExecutor { ); let first_recv_barrier = receive_next_barrier(&mut self.barrier_rx).await?; assert_eq!(first_barrier.epoch, first_recv_barrier.epoch); - yield Message::Barrier(first_barrier); + yield Message::Barrier(first_recv_barrier); } } // Phase 3: consume upstream while let Some(msg) = upstream.try_next().await? { - if let Message::Barrier(barrier) = &msg { - let recv_barrier = receive_next_barrier(&mut self.barrier_rx).await?; - assert_eq!(barrier.epoch, recv_barrier.epoch); - } - yield msg; + yield match msg { + DispatcherMessage::Chunk(chunk) => Message::Chunk(chunk), + DispatcherMessage::Watermark(watermark) => Message::Watermark(watermark), + DispatcherMessage::Barrier(barrier) => { + let recv_barrier = receive_next_barrier(&mut self.barrier_rx).await?; + assert_eq!(barrier.epoch, recv_barrier.epoch); + Message::Barrier(recv_barrier) + } + }; } } } @@ -324,101 +331,236 @@ async fn read_change_log( } } -struct UpstreamBuffer<'a> { - upstream: &'a mut BoxedMessageStream, - // newer barrier at the front - barrier: VecDeque, - consume_upstream_row_count: LabelGuardedIntCounter<3>, +trait UpstreamBufferState { + // The future must be cancellation-safe + async fn is_finished(&mut self) -> StreamExecutorResult; + fn on_upstream_barrier(&mut self, upstream_barrier: DispatcherBarrier); +} + +struct StateOfConsumingSnapshot { + pending_barriers: Vec, +} + +impl UpstreamBufferState for StateOfConsumingSnapshot { + async fn is_finished(&mut self) -> StreamExecutorResult { + // never finish when consuming snapshot + Ok(false) + } + + fn on_upstream_barrier(&mut self, upstream_barrier: DispatcherBarrier) { + self.pending_barriers.push(upstream_barrier) + } +} + +struct StateOfConsumingLogStore<'a> { + barrier_rx: &'a mut mpsc::UnboundedReceiver, + /// Barriers received from upstream but not yet received the barrier from local barrier worker + /// newer barrier at the front + upstream_pending_barriers: VecDeque, + /// Barriers received from both upstream and local barrier worker + /// newer barrier at the front + barriers: VecDeque, is_finished: bool, + current_subscriber_id: u32, + upstream_table_id: TableId, +} + +impl<'a> StateOfConsumingLogStore<'a> { + fn barrier_count(&self) -> usize { + self.upstream_pending_barriers.len() + self.barriers.len() + } + + async fn handle_one_pending_barrier(&mut self) -> StreamExecutorResult { + assert!(!self.is_finished); + let barrier = receive_next_barrier(self.barrier_rx).await?; + assert_eq!( + self.upstream_pending_barriers + .pop_back() + .expect("non-empty") + .epoch, + barrier.epoch + ); + if is_finish_barrier(&barrier, self.current_subscriber_id, self.upstream_table_id) { + self.is_finished = true; + } + Ok(barrier) + } +} + +impl<'a> UpstreamBufferState for StateOfConsumingLogStore<'a> { + async fn is_finished(&mut self) -> StreamExecutorResult { + while !self.upstream_pending_barriers.is_empty() { + let barrier = self.handle_one_pending_barrier().await?; + self.barriers.push_front(barrier); + } + if self.is_finished { + assert!(self.upstream_pending_barriers.is_empty()); + } + Ok(self.is_finished) + } + + fn on_upstream_barrier(&mut self, upstream_barrier: DispatcherBarrier) { + self.upstream_pending_barriers.push_front(upstream_barrier); + } +} + +mod erase_upstream_mutation { + use futures::TryStreamExt; + + use crate::executor::prelude::Stream; + use crate::executor::{BoxedMessageStream, DispatcherMessageStreamItem}; + + pub(super) fn erase_upstream_mutation(upstream: BoxedMessageStream) -> UpstreamStream { + upstream.map_ok(|msg| { + msg.map_mutation(|mutation| { + if let Some(mutation) = mutation { + // TODO: assert none mutation after we explicitly erase mutation + warn!( + ?mutation, + "receive non-empty mutation from upstream. ignored" + ); + }; + }) + }) + } + + pub(super) type UpstreamStream = impl Stream + Unpin; +} + +use erase_upstream_mutation::*; + +struct UpstreamBuffer<'a, S> { + upstream: &'a mut UpstreamStream, + state: S, + consume_upstream_row_count: LabelGuardedIntCounter<3>, upstream_table_id: TableId, current_subscriber_id: u32, } -impl<'a> UpstreamBuffer<'a> { +impl<'a> UpstreamBuffer<'a, StateOfConsumingSnapshot> { fn new( - upstream: &'a mut BoxedMessageStream, + upstream: &'a mut UpstreamStream, upstream_table_id: TableId, current_subscriber_id: u32, consume_upstream_row_count: LabelGuardedIntCounter<3>, ) -> Self { Self { upstream, - barrier: Default::default(), + state: StateOfConsumingSnapshot { + pending_barriers: vec![], + }, consume_upstream_row_count, - is_finished: false, upstream_table_id, current_subscriber_id, } } + fn start_consuming_log_store<'s>( + self, + barrier_rx: &'s mut UnboundedReceiver, + ) -> UpstreamBuffer<'a, StateOfConsumingLogStore<'s>> { + let StateOfConsumingSnapshot { pending_barriers } = self.state; + let mut upstream_pending_barriers = VecDeque::with_capacity(pending_barriers.len()); + for pending_barrier in pending_barriers { + upstream_pending_barriers.push_front(pending_barrier); + } + UpstreamBuffer { + upstream: self.upstream, + state: StateOfConsumingLogStore { + barrier_rx, + upstream_pending_barriers, + barriers: Default::default(), + is_finished: false, + current_subscriber_id: self.current_subscriber_id, + upstream_table_id: self.upstream_table_id, + }, + consume_upstream_row_count: self.consume_upstream_row_count, + upstream_table_id: self.upstream_table_id, + current_subscriber_id: self.current_subscriber_id, + } + } +} + +impl<'a, S: UpstreamBufferState> UpstreamBuffer<'a, S> { async fn concurrently_consume_upstream(&mut self) -> StreamExecutorError { - while !self.is_finished { - let result = self.consume_until_next_barrier().await; - let barrier = match result { - Ok(barrier) => barrier, - Err(e) => { - return e; - } - }; - self.barrier.push_front(barrier); + if let Err(e) = try { + while !self.state.is_finished().await? { + self.consume_until_next_barrier().await?; + } + } { + return e; } pending().await } - async fn consume_until_next_barrier(&mut self) -> StreamExecutorResult { - assert!(!self.is_finished); + /// Consume the upstream until seeing the next barrier. + /// `pending_barriers` must be non-empty after this method returns. + async fn consume_until_next_barrier(&mut self) -> StreamExecutorResult<()> { loop { - let msg: Message = self + let msg: DispatcherMessage = self .upstream .try_next() .await? .ok_or_else(|| anyhow!("end of upstream"))?; match msg { - Message::Chunk(chunk) => { + DispatcherMessage::Chunk(chunk) => { self.consume_upstream_row_count .inc_by(chunk.cardinality() as _); } - Message::Barrier(barrier) => { - self.is_finished = self.is_finish_barrier(&barrier); - break Ok(barrier); + DispatcherMessage::Barrier(barrier) => { + self.state.on_upstream_barrier(barrier); + break Ok(()); } - Message::Watermark(_) => {} + DispatcherMessage::Watermark(_) => {} } } } +} +impl<'a, 's> UpstreamBuffer<'a, StateOfConsumingLogStore<'s>> { async fn take_buffered_barrier(&mut self) -> StreamExecutorResult> { - Ok(if let Some(barrier) = self.barrier.pop_back() { + Ok(if let Some(barrier) = self.state.barriers.pop_back() { Some(barrier) - } else if self.is_finished { + } else if !self.state.upstream_pending_barriers.is_empty() { + let barrier = self.state.handle_one_pending_barrier().await?; + Some(barrier) + } else if self.state.is_finished { None } else { - Some(self.consume_until_next_barrier().await?) + self.consume_until_next_barrier().await?; + let barrier = self.state.handle_one_pending_barrier().await?; + Some(barrier) }) } +} - fn is_finish_barrier(&self, barrier: &Barrier) -> bool { - if let Some(Mutation::DropSubscriptions { - subscriptions_to_drop, - }) = barrier.mutation.as_deref() - { - let is_finished = subscriptions_to_drop - .iter() - .any(|(subscriber_id, _)| *subscriber_id == self.current_subscriber_id); - if is_finished { - assert!(subscriptions_to_drop.iter().any( - |(subscriber_id, subscribed_upstream_table_id)| { - *subscriber_id == self.current_subscriber_id - && self.upstream_table_id == *subscribed_upstream_table_id - } - )) - } - is_finished - } else { - false +fn is_finish_barrier( + barrier: &Barrier, + current_subscriber_id: u32, + upstream_table_id: TableId, +) -> bool { + if let Some(Mutation::DropSubscriptions { + subscriptions_to_drop, + }) = barrier.mutation.as_deref() + { + let is_finished = subscriptions_to_drop + .iter() + .any(|(subscriber_id, _)| *subscriber_id == current_subscriber_id); + if is_finished { + assert!(subscriptions_to_drop.iter().any( + |(subscriber_id, subscribed_upstream_table_id)| { + *subscriber_id == current_subscriber_id + && upstream_table_id == *subscribed_upstream_table_id + } + )) } + is_finished + } else { + false } +} +impl<'a, S: UpstreamBufferState> UpstreamBuffer<'a, S> { /// Run a future while concurrently polling the upstream so that the upstream /// won't be back-pressured. async fn run_future>( diff --git a/src/stream/src/executor/mod.rs b/src/stream/src/executor/mod.rs index 7b22a48a25ab6..8b9f7b3f2242b 100644 --- a/src/stream/src/executor/mod.rs +++ b/src/stream/src/executor/mod.rs @@ -164,13 +164,17 @@ pub use wrapper::WrapperExecutor; use self::barrier_align::AlignedMessageStream; -pub type MessageStreamItem = StreamExecutorResult; +pub type MessageStreamItemInner = StreamExecutorResult>; +pub type MessageStreamItem = MessageStreamItemInner; +pub type DispatcherMessageStreamItem = MessageStreamItemInner<()>; pub type BoxedMessageStream = BoxStream<'static, MessageStreamItem>; pub use risingwave_common::util::epoch::task_local::{curr_epoch, epoch, prev_epoch}; use risingwave_pb::stream_plan::throttle_mutation::RateLimit; -pub trait MessageStream = futures::Stream + Send; +pub trait MessageStreamInner = Stream> + Send; +pub trait MessageStream = Stream + Send; +pub trait DispatcherMessageStream = Stream + Send; /// Static information of an executor. #[derive(Debug, Default, Clone)] @@ -913,6 +917,16 @@ impl BarrierInner { tracing_context: TracingContext::from_protobuf(&prost.tracing_context), }) } + + pub fn map_mutation(self, f: impl FnOnce(M) -> M2) -> BarrierInner { + BarrierInner { + epoch: self.epoch, + mutation: f(self.mutation), + kind: self.kind, + tracing_context: self.tracing_context, + passed_actors: self.passed_actors, + } + } } impl DispatcherBarrier { @@ -1017,6 +1031,16 @@ pub enum MessageInner { Watermark(Watermark), } +impl MessageInner { + pub fn map_mutation(self, f: impl FnOnce(M) -> M2) -> MessageInner { + match self { + MessageInner::Chunk(chunk) => MessageInner::Chunk(chunk), + MessageInner::Barrier(barrier) => MessageInner::Barrier(barrier.map_mutation(f)), + MessageInner::Watermark(watermark) => MessageInner::Watermark(watermark), + } + } +} + pub type Message = MessageInner; pub type DispatcherMessage = MessageInner<()>; @@ -1102,9 +1126,9 @@ pub type PkIndicesRef<'a> = &'a [usize]; pub type PkDataTypes = SmallVec<[DataType; 1]>; /// Expect the first message of the given `stream` as a barrier. -pub async fn expect_first_barrier( - stream: &mut (impl MessageStream + Unpin), -) -> StreamExecutorResult { +pub async fn expect_first_barrier( + stream: &mut (impl MessageStreamInner + Unpin), +) -> StreamExecutorResult> { let message = stream .next() .instrument_await("expect_first_barrier") From 670a94f118d61462b720d51d62f164986d23cb23 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Thu, 5 Sep 2024 13:29:27 +0800 Subject: [PATCH 23/26] feat: variable vnode count support in table distribution (#18373) Signed-off-by: Bugen Zhao --- .../executor/join/distributed_lookup_join.rs | 8 +- .../src/executor/join/local_lookup_join.rs | 9 +- src/batch/src/executor/log_row_seq_scan.rs | 6 +- src/batch/src/executor/row_seq_scan.rs | 5 +- src/common/src/hash/consistent_hash/bitmap.rs | 14 +++ src/common/src/hash/consistent_hash/vnode.rs | 5 + src/common/src/hash/table_distribution.rs | 119 ++++++++---------- src/common/src/util/scan_range.rs | 4 +- src/ctl/src/cmd_impl/table/scan.rs | 8 +- src/frontend/src/scheduler/plan_fragmenter.rs | 3 +- .../hummock_test/src/state_store_tests.rs | 5 +- .../src/hummock/iterator/change_log.rs | 5 +- .../log_store_impl/kv_log_store/serde.rs | 22 ++-- .../src/common/table/test_state_table.rs | 16 +-- src/stream/src/executor/watermark_filter.rs | 13 +- src/stream/src/from_proto/mview.rs | 2 +- src/stream/src/from_proto/watermark_filter.rs | 5 +- 17 files changed, 127 insertions(+), 122 deletions(-) diff --git a/src/batch/src/executor/join/distributed_lookup_join.rs b/src/batch/src/executor/join/distributed_lookup_join.rs index 1068ffd7f3349..74d7843013e4d 100644 --- a/src/batch/src/executor/join/distributed_lookup_join.rs +++ b/src/batch/src/executor/join/distributed_lookup_join.rs @@ -17,8 +17,9 @@ use std::mem::swap; use futures::pin_mut; use itertools::Itertools; +use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, Schema}; -use risingwave_common::hash::{HashKey, HashKeyDispatcher}; +use risingwave_common::hash::{HashKey, HashKeyDispatcher, VirtualNode}; use risingwave_common::memory::MemoryContext; use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum}; @@ -30,7 +31,7 @@ use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::BatchQueryEpoch; use risingwave_storage::store::PrefetchOptions; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::{TableDistribution, TableIter}; +use risingwave_storage::table::TableIter; use risingwave_storage::{dispatch_state_store, StateStore}; use crate::error::Result; @@ -194,7 +195,8 @@ impl BoxedExecutorBuilder for DistributedLookupJoinExecutorBuilder { .collect(); // Lookup Join always contains distribution key, so we don't need vnode bitmap - let vnodes = Some(TableDistribution::all_vnodes()); + // TODO(var-vnode): use vnode count from table desc + let vnodes = Some(Bitmap::ones(VirtualNode::COUNT).into()); dispatch_state_store!(source.context().state_store(), state_store, { let table = StorageTable::new_partial(state_store, column_ids, vnodes, table_desc); let inner_side_builder = InnerSideExecutorBuilder::new( diff --git a/src/batch/src/executor/join/local_lookup_join.rs b/src/batch/src/executor/join/local_lookup_join.rs index a3be00fc39a22..7c7a08af5d873 100644 --- a/src/batch/src/executor/join/local_lookup_join.rs +++ b/src/batch/src/executor/join/local_lookup_join.rs @@ -17,7 +17,7 @@ use std::marker::PhantomData; use anyhow::Context; use itertools::Itertools; -use risingwave_common::bitmap::BitmapBuilder; +use risingwave_common::bitmap::{Bitmap, BitmapBuilder}; use risingwave_common::catalog::{ColumnDesc, Field, Schema}; use risingwave_common::hash::table_distribution::TableDistribution; use risingwave_common::hash::{ @@ -408,12 +408,11 @@ impl BoxedExecutorBuilder for LocalLookupJoinExecutorBuilder { }) .collect(); + // TODO(var-vnode): use vnode count from table desc + let vnodes = Some(Bitmap::ones(VirtualNode::COUNT).into()); let inner_side_builder = InnerSideExecutorBuilder { table_desc: table_desc.clone(), - table_distribution: TableDistribution::new_from_storage_table_desc( - Some(TableDistribution::all_vnodes()), - table_desc, - ), + table_distribution: TableDistribution::new_from_storage_table_desc(vnodes, table_desc), vnode_mapping, outer_side_key_types, inner_side_schema, diff --git a/src/batch/src/executor/log_row_seq_scan.rs b/src/batch/src/executor/log_row_seq_scan.rs index 7106eaec1b760..be2a11b756946 100644 --- a/src/batch/src/executor/log_row_seq_scan.rs +++ b/src/batch/src/executor/log_row_seq_scan.rs @@ -22,13 +22,14 @@ use prometheus::Histogram; use risingwave_common::array::{DataChunk, Op}; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnId, Field, Schema}; +use risingwave_common::hash::VirtualNode; use risingwave_common::row::{Row, RowExt}; use risingwave_common::types::ScalarImpl; use risingwave_pb::batch_plan::plan_node::NodeBody; use risingwave_pb::common::{batch_query_epoch, BatchQueryEpoch}; use risingwave_pb::plan_common::StorageTableDesc; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::{collect_data_chunk, TableDistribution}; +use risingwave_storage::table::collect_data_chunk; use risingwave_storage::{dispatch_state_store, StateStore}; use super::{BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder}; @@ -106,7 +107,8 @@ impl BoxedExecutorBuilder for LogStoreRowSeqScanExecutorBuilder { Some(vnodes) => Some(Bitmap::from(vnodes).into()), // This is possible for dml. vnode_bitmap is not filled by scheduler. // Or it's single distribution, e.g., distinct agg. We scan in a single executor. - None => Some(TableDistribution::all_vnodes()), + // TODO(var-vnode): use vnode count from table desc + None => Some(Bitmap::ones(VirtualNode::COUNT).into()), }; let chunk_size = source.context.get_config().developer.chunk_size as u32; diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs index b897dbd813787..7c7244d954764 100644 --- a/src/batch/src/executor/row_seq_scan.rs +++ b/src/batch/src/executor/row_seq_scan.rs @@ -21,6 +21,7 @@ use prometheus::Histogram; use risingwave_common::array::DataChunk; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnId, Schema}; +use risingwave_common::hash::VirtualNode; use risingwave_common::row::{OwnedRow, Row}; use risingwave_common::types::{DataType, Datum}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; @@ -32,7 +33,6 @@ use risingwave_pb::plan_common::as_of::AsOfType; use risingwave_pb::plan_common::{as_of, PbAsOf, StorageTableDesc}; use risingwave_storage::store::PrefetchOptions; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::TableDistribution; use risingwave_storage::{dispatch_state_store, StateStore}; use crate::error::{BatchError, Result}; @@ -210,7 +210,8 @@ impl BoxedExecutorBuilder for RowSeqScanExecutorBuilder { Some(vnodes) => Some(Bitmap::from(vnodes).into()), // This is possible for dml. vnode_bitmap is not filled by scheduler. // Or it's single distribution, e.g., distinct agg. We scan in a single executor. - None => Some(TableDistribution::all_vnodes()), + // TODO(var-vnode): use vnode count from table desc + None => Some(Bitmap::ones(VirtualNode::COUNT).into()), }; let scan_ranges = { diff --git a/src/common/src/hash/consistent_hash/bitmap.rs b/src/common/src/hash/consistent_hash/bitmap.rs index 773231ba36a89..eee6a64a2b42c 100644 --- a/src/common/src/hash/consistent_hash/bitmap.rs +++ b/src/common/src/hash/consistent_hash/bitmap.rs @@ -15,6 +15,7 @@ use std::ops::RangeInclusive; use crate::bitmap::Bitmap; +use crate::hash::table_distribution::SINGLETON_VNODE; use crate::hash::VirtualNode; /// An extension trait for `Bitmap` to support virtual node operations. @@ -36,4 +37,17 @@ impl Bitmap { self.high_ranges() .map(|r| (VirtualNode::from_index(*r.start())..=VirtualNode::from_index(*r.end()))) } + + /// Returns whether only the [`SINGLETON_VNODE`] is set in the bitmap. + /// + /// Note that this method returning `true` does not imply that the bitmap was created by + /// [`VnodeBitmapExt::singleton`], or that the bitmap has length 1. + pub fn is_singleton(&self) -> bool { + self.count_ones() == 1 && self.iter_vnodes().next().unwrap() == SINGLETON_VNODE + } + + /// Creates a bitmap with length 1 and the single bit set. + pub fn singleton() -> Self { + Self::ones(1) + } } diff --git a/src/common/src/hash/consistent_hash/vnode.rs b/src/common/src/hash/consistent_hash/vnode.rs index f528544689f31..dd4095535fdf3 100644 --- a/src/common/src/hash/consistent_hash/vnode.rs +++ b/src/common/src/hash/consistent_hash/vnode.rs @@ -114,6 +114,11 @@ impl VirtualNode { } } +impl VirtualNode { + pub const COUNT_FOR_TEST: usize = Self::COUNT; + pub const MAX_FOR_TEST: VirtualNode = Self::MAX; +} + impl VirtualNode { // `compute_chunk` is used to calculate the `VirtualNode` for the columns in the // chunk. When only one column is provided and its type is `Serial`, we consider the column to diff --git a/src/common/src/hash/table_distribution.rs b/src/common/src/hash/table_distribution.rs index 9be9cd2abafb2..480483bc96a5d 100644 --- a/src/common/src/hash/table_distribution.rs +++ b/src/common/src/hash/table_distribution.rs @@ -13,30 +13,34 @@ // limitations under the License. use std::mem::replace; -use std::ops::Deref; use std::sync::{Arc, LazyLock}; use itertools::Itertools; use risingwave_pb::plan_common::StorageTableDesc; -use tracing::warn; use crate::array::{Array, DataChunk, PrimitiveArray}; -use crate::bitmap::{Bitmap, BitmapBuilder}; +use crate::bitmap::Bitmap; use crate::hash::VirtualNode; use crate::row::Row; use crate::util::iter_util::ZipEqFast; -/// For tables without distribution (singleton), the `DEFAULT_VNODE` is encoded. -pub const DEFAULT_VNODE: VirtualNode = VirtualNode::ZERO; +/// For tables without distribution (singleton), the `SINGLETON_VNODE` is encoded. +pub const SINGLETON_VNODE: VirtualNode = VirtualNode::ZERO; + +use super::VnodeBitmapExt; #[derive(Debug, Clone)] enum ComputeVnode { Singleton, DistKeyIndices { + /// Virtual nodes that the table is partitioned into. + vnodes: Arc, /// Indices of distribution key for computing vnode, based on the pk columns of the table. dist_key_in_pk_indices: Vec, }, VnodeColumnIndex { + /// Virtual nodes that the table is partitioned into. + vnodes: Arc, /// Index of vnode column. vnode_col_idx_in_pk: usize, }, @@ -47,13 +51,8 @@ enum ComputeVnode { pub struct TableDistribution { /// The way to compute vnode provided primary key compute_vnode: ComputeVnode, - - /// Virtual nodes that the table is partitioned into. - vnodes: Arc, } -pub const SINGLETON_VNODE: VirtualNode = DEFAULT_VNODE; - impl TableDistribution { pub fn new_from_storage_table_desc( vnodes: Option>, @@ -75,69 +74,32 @@ impl TableDistribution { ) -> Self { let compute_vnode = if let Some(vnode_col_idx_in_pk) = vnode_col_idx_in_pk { ComputeVnode::VnodeColumnIndex { + vnodes: vnodes.unwrap_or_else(|| Bitmap::singleton().into()), vnode_col_idx_in_pk, } } else if !dist_key_in_pk_indices.is_empty() { ComputeVnode::DistKeyIndices { + vnodes: vnodes.expect("vnodes must be `Some` as dist key indices are set"), dist_key_in_pk_indices, } } else { ComputeVnode::Singleton }; - let vnodes = vnodes.unwrap_or_else(Self::singleton_vnode_bitmap); - if let ComputeVnode::Singleton = &compute_vnode { - if &vnodes != Self::singleton_vnode_bitmap_ref() && &vnodes != Self::all_vnodes_ref() { - warn!( - ?vnodes, - "singleton distribution get non-singleton vnode bitmap" - ); - } - } - - Self { - compute_vnode, - vnodes, - } + Self { compute_vnode } } pub fn is_singleton(&self) -> bool { matches!(&self.compute_vnode, ComputeVnode::Singleton) } - pub fn singleton_vnode_bitmap_ref() -> &'static Arc { - /// A bitmap that only the default vnode is set. - static SINGLETON_VNODES: LazyLock> = LazyLock::new(|| { - let mut vnodes = BitmapBuilder::zeroed(VirtualNode::COUNT); - vnodes.set(SINGLETON_VNODE.to_index(), true); - vnodes.finish().into() - }); - - SINGLETON_VNODES.deref() - } - - pub fn singleton_vnode_bitmap() -> Arc { - Self::singleton_vnode_bitmap_ref().clone() - } - - pub fn all_vnodes_ref() -> &'static Arc { - /// A bitmap that all vnodes are set. - static ALL_VNODES: LazyLock> = - LazyLock::new(|| Bitmap::ones(VirtualNode::COUNT).into()); - &ALL_VNODES - } - - pub fn all_vnodes() -> Arc { - Self::all_vnodes_ref().clone() - } - /// Distribution that accesses all vnodes, mainly used for tests. - pub fn all(dist_key_in_pk_indices: Vec) -> Self { + pub fn all(dist_key_in_pk_indices: Vec, vnode_count: usize) -> Self { Self { compute_vnode: ComputeVnode::DistKeyIndices { + vnodes: Bitmap::ones(vnode_count).into(), dist_key_in_pk_indices, }, - vnodes: Self::all_vnodes(), } } @@ -145,20 +107,39 @@ impl TableDistribution { pub fn singleton() -> Self { Self { compute_vnode: ComputeVnode::Singleton, - vnodes: Self::singleton_vnode_bitmap(), } } pub fn update_vnode_bitmap(&mut self, new_vnodes: Arc) -> Arc { - if self.is_singleton() && &new_vnodes != Self::singleton_vnode_bitmap_ref() { - warn!(?new_vnodes, "update vnode on singleton distribution"); + match &mut self.compute_vnode { + ComputeVnode::Singleton => { + if !new_vnodes.is_singleton() { + panic!( + "update vnode bitmap on singleton distribution to non-singleton: {:?}", + new_vnodes + ); + } + self.vnodes().clone() // not updated + } + + ComputeVnode::DistKeyIndices { vnodes, .. } + | ComputeVnode::VnodeColumnIndex { vnodes, .. } => { + assert_eq!(vnodes.len(), new_vnodes.len()); + replace(vnodes, new_vnodes) + } } - assert_eq!(self.vnodes.len(), new_vnodes.len()); - replace(&mut self.vnodes, new_vnodes) } + /// Get vnode bitmap if distributed, or a dummy [`Bitmap::singleton()`] if singleton. pub fn vnodes(&self) -> &Arc { - &self.vnodes + static SINGLETON_VNODES: LazyLock> = + LazyLock::new(|| Bitmap::singleton().into()); + + match &self.compute_vnode { + ComputeVnode::DistKeyIndices { vnodes, .. } => vnodes, + ComputeVnode::VnodeColumnIndex { vnodes, .. } => vnodes, + ComputeVnode::Singleton => &SINGLETON_VNODES, + } } /// Get vnode value with given primary key. @@ -166,11 +147,13 @@ impl TableDistribution { match &self.compute_vnode { ComputeVnode::Singleton => SINGLETON_VNODE, ComputeVnode::DistKeyIndices { + vnodes, dist_key_in_pk_indices, - } => compute_vnode(pk, dist_key_in_pk_indices, &self.vnodes), + } => compute_vnode(pk, dist_key_in_pk_indices, vnodes), ComputeVnode::VnodeColumnIndex { + vnodes, vnode_col_idx_in_pk, - } => get_vnode_from_row(pk, *vnode_col_idx_in_pk, &self.vnodes), + } => get_vnode_from_row(pk, *vnode_col_idx_in_pk, vnodes), } } @@ -178,22 +161,20 @@ impl TableDistribution { match &self.compute_vnode { ComputeVnode::Singleton => Some(SINGLETON_VNODE), ComputeVnode::DistKeyIndices { + vnodes, dist_key_in_pk_indices, } => dist_key_in_pk_indices .iter() .all(|&d| d < pk_prefix.len()) - .then(|| compute_vnode(pk_prefix, dist_key_in_pk_indices, &self.vnodes)), + .then(|| compute_vnode(pk_prefix, dist_key_in_pk_indices, vnodes)), ComputeVnode::VnodeColumnIndex { + vnodes, vnode_col_idx_in_pk, } => { if *vnode_col_idx_in_pk >= pk_prefix.len() { None } else { - Some(get_vnode_from_row( - pk_prefix, - *vnode_col_idx_in_pk, - &self.vnodes, - )) + Some(get_vnode_from_row(pk_prefix, *vnode_col_idx_in_pk, vnodes)) } } } @@ -230,6 +211,7 @@ impl TableDistribution { vec![SINGLETON_VNODE; chunk.capacity()] } ComputeVnode::DistKeyIndices { + vnodes, dist_key_in_pk_indices, } => { let dist_key_indices = dist_key_in_pk_indices @@ -243,13 +225,14 @@ impl TableDistribution { .map(|(vnode, vis)| { // Ignore the invisible rows. if vis { - check_vnode_is_set(vnode, &self.vnodes); + check_vnode_is_set(vnode, vnodes); } vnode }) .collect() } ComputeVnode::VnodeColumnIndex { + vnodes, vnode_col_idx_in_pk, } => { let array: &PrimitiveArray = @@ -262,7 +245,7 @@ impl TableDistribution { let vnode = VirtualNode::from_scalar(vnode); if vis { assert!(exist); - check_vnode_is_set(vnode, &self.vnodes); + check_vnode_is_set(vnode, vnodes); } vnode }) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index fd056f1790444..5d5e84ed32085 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -159,7 +159,7 @@ mod tests { let pk = vec![1, 3, 2]; let dist_key_idx_in_pk = crate::catalog::get_dist_key_in_pk_indices(&dist_key, &pk).unwrap(); - let dist = TableDistribution::all(dist_key_idx_in_pk); + let dist = TableDistribution::all(dist_key_idx_in_pk, VirtualNode::COUNT_FOR_TEST); let mut scan_range = ScanRange::full_table_scan(); assert!(scan_range.try_compute_vnode(&dist).is_none()); @@ -185,7 +185,7 @@ mod tests { let pk = vec![1, 3, 2]; let dist_key_idx_in_pk = crate::catalog::get_dist_key_in_pk_indices(&dist_key, &pk).unwrap(); - let dist = TableDistribution::all(dist_key_idx_in_pk); + let dist = TableDistribution::all(dist_key_idx_in_pk, VirtualNode::COUNT_FOR_TEST); let mut scan_range = ScanRange::full_table_scan(); assert!(scan_range.try_compute_vnode(&dist).is_none()); diff --git a/src/ctl/src/cmd_impl/table/scan.rs b/src/ctl/src/cmd_impl/table/scan.rs index e5bba170bf97a..f5cee710a40fc 100644 --- a/src/ctl/src/cmd_impl/table/scan.rs +++ b/src/ctl/src/cmd_impl/table/scan.rs @@ -14,6 +14,8 @@ use anyhow::{anyhow, Result}; use futures::{pin_mut, StreamExt}; +use risingwave_common::bitmap::Bitmap; +use risingwave_common::hash::VirtualNode; use risingwave_frontend::TableCatalog; use risingwave_hummock_sdk::HummockReadEpoch; use risingwave_rpc_client::MetaClient; @@ -63,7 +65,8 @@ pub async fn make_state_table(hummock: S, table: &TableCatalog) - .collect(), table.pk().iter().map(|x| x.order_type).collect(), table.pk().iter().map(|x| x.column_index).collect(), - TableDistribution::all(table.distribution_key().to_vec()), // scan all vnodes + // TODO(var-vnode): use vnode count from table desc + TableDistribution::all(table.distribution_key().to_vec(), VirtualNode::COUNT), // scan all vnodes Some(table.value_indices.clone()), ) .await @@ -81,7 +84,8 @@ pub fn make_storage_table( Ok(StorageTable::new_partial( hummock, output_columns_ids, - Some(TableDistribution::all_vnodes()), + // TODO(var-vnode): use vnode count from table desc + Some(Bitmap::ones(VirtualNode::COUNT).into()), &table.table_desc().try_to_protobuf()?, )) } diff --git a/src/frontend/src/scheduler/plan_fragmenter.rs b/src/frontend/src/scheduler/plan_fragmenter.rs index 09e4cbc0bfa03..2ecae1d7f7642 100644 --- a/src/frontend/src/scheduler/plan_fragmenter.rs +++ b/src/frontend/src/scheduler/plan_fragmenter.rs @@ -1250,7 +1250,8 @@ fn derive_partitions( } let table_distribution = TableDistribution::new_from_storage_table_desc( - Some(TableDistribution::all_vnodes()), + // TODO(var-vnode): use vnode count from table desc + Some(Bitmap::ones(VirtualNode::COUNT).into()), &table_desc.try_to_protobuf()?, ); diff --git a/src/storage/hummock_test/src/state_store_tests.rs b/src/storage/hummock_test/src/state_store_tests.rs index 35f3d08a9ed8a..67da2150735af 100644 --- a/src/storage/hummock_test/src/state_store_tests.rs +++ b/src/storage/hummock_test/src/state_store_tests.rs @@ -24,7 +24,6 @@ use futures::{pin_mut, StreamExt}; use itertools::Itertools; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{TableId, TableOption}; -use risingwave_common::hash::table_distribution::TableDistribution; use risingwave_common::hash::VirtualNode; use risingwave_common::util::epoch::{test_epoch, EpochExt, MAX_EPOCH}; use risingwave_hummock_sdk::key::{prefixed_range_with_vnode, TableKeyRange}; @@ -1565,7 +1564,7 @@ async fn test_iter_log() { }, table_option: Default::default(), is_replicated: false, - vnodes: TableDistribution::all_vnodes(), + vnodes: Bitmap::ones(VirtualNode::COUNT_FOR_TEST).into(), }) .await; @@ -1580,7 +1579,7 @@ async fn test_iter_log() { }, table_option: Default::default(), is_replicated: false, - vnodes: TableDistribution::all_vnodes(), + vnodes: Bitmap::ones(VirtualNode::COUNT_FOR_TEST).into(), }) .await; // flush for about 10 times per epoch diff --git a/src/storage/src/hummock/iterator/change_log.rs b/src/storage/src/hummock/iterator/change_log.rs index 6fc99f29a80f3..ae8061c37b07d 100644 --- a/src/storage/src/hummock/iterator/change_log.rs +++ b/src/storage/src/hummock/iterator/change_log.rs @@ -527,8 +527,9 @@ mod tests { use bytes::Bytes; use itertools::Itertools; + use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::TableId; - use risingwave_common::hash::table_distribution::TableDistribution; + use risingwave_common::hash::VirtualNode; use risingwave_common::util::epoch::test_epoch; use risingwave_hummock_sdk::key::{TableKey, UserKey}; use risingwave_hummock_sdk::EpochWithGap; @@ -699,7 +700,7 @@ mod tests { }, table_option: Default::default(), is_replicated: false, - vnodes: TableDistribution::all_vnodes(), + vnodes: Bitmap::ones(VirtualNode::COUNT_FOR_TEST).into(), }) .await; let logs = gen_test_data(epoch_count, 10000, 0.05, 0.2); diff --git a/src/stream/src/common/log_store_impl/kv_log_store/serde.rs b/src/stream/src/common/log_store_impl/kv_log_store/serde.rs index 92a3caf4cd2e3..17ab103d758b4 100644 --- a/src/stream/src/common/log_store_impl/kv_log_store/serde.rs +++ b/src/stream/src/common/log_store_impl/kv_log_store/serde.rs @@ -25,7 +25,7 @@ use itertools::Itertools; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::ColumnDesc; -use risingwave_common::hash::VirtualNode; +use risingwave_common::hash::{VirtualNode, VnodeBitmapExt}; use risingwave_common::row::{OwnedRow, Row, RowExt}; use risingwave_common::types::{DataType, ScalarImpl}; use risingwave_common::util::chunk_coalesce::DataChunkBuilder; @@ -42,7 +42,7 @@ use risingwave_storage::error::StorageResult; use risingwave_storage::row_serde::row_serde_util::{serialize_pk, serialize_pk_with_vnode}; use risingwave_storage::row_serde::value_serde::ValueRowSerdeNew; use risingwave_storage::store::{StateStoreIterExt, StateStoreReadIter}; -use risingwave_storage::table::{compute_vnode, TableDistribution, SINGLETON_VNODE}; +use risingwave_storage::table::{compute_vnode, SINGLETON_VNODE}; use rw_futures_util::select_all; use crate::common::log_store_impl::kv_log_store::{ @@ -201,8 +201,7 @@ impl LogStoreRowSerde { let vnodes = match vnodes { Some(vnodes) => vnodes, - - None => TableDistribution::singleton_vnode_bitmap(), + None => Bitmap::singleton().into(), }; // epoch and seq_id. The seq_id of barrier is set null, and therefore the second order type @@ -216,7 +215,7 @@ impl LogStoreRowSerde { ); let dist_key_indices = if dist_key_indices.is_empty() { - if &vnodes != TableDistribution::singleton_vnode_bitmap_ref() { + if !vnodes.is_singleton() { warn!( ?vnodes, "singleton log store gets non-singleton vnode bitmap" @@ -946,7 +945,7 @@ mod tests { use risingwave_storage::store::{ FromStreamStateStoreIter, StateStoreIterItem, StateStoreReadIter, }; - use risingwave_storage::table::DEFAULT_VNODE; + use risingwave_storage::table::SINGLETON_VNODE; use tokio::sync::oneshot; use tokio::sync::oneshot::Sender; @@ -1024,7 +1023,7 @@ mod tests { seq_id += 1; } - let (key, encoded_barrier) = serde.serialize_barrier(epoch, DEFAULT_VNODE, false); + let (key, encoded_barrier) = serde.serialize_barrier(epoch, SINGLETON_VNODE, false); let key = remove_vnode_prefix(&key.0); match serde.deserialize(&encoded_barrier).unwrap() { (decoded_epoch, LogStoreRowOp::Barrier { is_checkpoint }) => { @@ -1062,7 +1061,8 @@ mod tests { seq_id += 1; } - let (key, encoded_checkpoint_barrier) = serde.serialize_barrier(epoch, DEFAULT_VNODE, true); + let (key, encoded_checkpoint_barrier) = + serde.serialize_barrier(epoch, SINGLETON_VNODE, true); let key = remove_vnode_prefix(&key.0); match serde.deserialize(&encoded_checkpoint_barrier).unwrap() { (decoded_epoch, LogStoreRowOp::Barrier { is_checkpoint }) => { @@ -1200,7 +1200,7 @@ mod tests { ) { let (ops, rows) = gen_test_data(base); let first_barrier = { - let (key, value) = serde.serialize_barrier(EPOCH0, DEFAULT_VNODE, true); + let (key, value) = serde.serialize_barrier(EPOCH0, SINGLETON_VNODE, true); Ok((FullKey::new(TEST_TABLE_ID, key, EPOCH0), value)) }; let stream = stream::once(async move { first_barrier }); @@ -1210,7 +1210,7 @@ mod tests { let stream = stream.chain(stream::once({ let serde = serde.clone(); async move { - let (key, value) = serde.serialize_barrier(EPOCH1, DEFAULT_VNODE, false); + let (key, value) = serde.serialize_barrier(EPOCH1, SINGLETON_VNODE, false); Ok((FullKey::new(TEST_TABLE_ID, key, EPOCH1), value)) } })); @@ -1218,7 +1218,7 @@ mod tests { gen_row_stream(serde.clone(), ops.clone(), rows.clone(), EPOCH2, seq_id); let stream = stream.chain(row_stream).chain(stream::once({ async move { - let (key, value) = serde.serialize_barrier(EPOCH2, DEFAULT_VNODE, true); + let (key, value) = serde.serialize_barrier(EPOCH2, SINGLETON_VNODE, true); Ok((FullKey::new(TEST_TABLE_ID, key, EPOCH2), value)) } })); diff --git a/src/stream/src/common/table/test_state_table.rs b/src/stream/src/common/table/test_state_table.rs index 098548c21ac93..dde0d8a581406 100644 --- a/src/stream/src/common/table/test_state_table.rs +++ b/src/stream/src/common/table/test_state_table.rs @@ -27,7 +27,7 @@ use risingwave_common::util::value_encoding::BasicSerde; use risingwave_hummock_test::test_utils::prepare_hummock_test_env; use risingwave_storage::hummock::HummockStorage; use risingwave_storage::store::PrefetchOptions; -use risingwave_storage::table::DEFAULT_VNODE; +use risingwave_storage::table::SINGLETON_VNODE; use crate::common::table::state_table::{ ReplicatedStateTable, StateTable, WatermarkCacheStateTable, @@ -445,7 +445,7 @@ async fn test_state_table_iter_with_pk_range() { std::ops::Bound::Included(OwnedRow::new(vec![Some(4_i32.into())])), ); let iter = state_table - .iter_with_vnode(DEFAULT_VNODE, &pk_range, Default::default()) + .iter_with_vnode(SINGLETON_VNODE, &pk_range, Default::default()) .await .unwrap(); pin_mut!(iter); @@ -470,7 +470,7 @@ async fn test_state_table_iter_with_pk_range() { std::ops::Bound::::Unbounded, ); let iter = state_table - .iter_with_vnode(DEFAULT_VNODE, &pk_range, Default::default()) + .iter_with_vnode(SINGLETON_VNODE, &pk_range, Default::default()) .await .unwrap(); pin_mut!(iter); @@ -1976,11 +1976,11 @@ async fn test_replicated_state_table_replication() { std::ops::Bound::Included(OwnedRow::new(vec![Some(2_i32.into())])), ); let iter = state_table - .iter_with_vnode(DEFAULT_VNODE, &range_bounds, Default::default()) + .iter_with_vnode(SINGLETON_VNODE, &range_bounds, Default::default()) .await .unwrap(); let replicated_iter = replicated_state_table - .iter_with_vnode_and_output_indices(DEFAULT_VNODE, &range_bounds, Default::default()) + .iter_with_vnode_and_output_indices(SINGLETON_VNODE, &range_bounds, Default::default()) .await .unwrap(); pin_mut!(iter); @@ -2039,7 +2039,7 @@ async fn test_replicated_state_table_replication() { ); let iter = state_table - .iter_with_vnode(DEFAULT_VNODE, &range_bounds, Default::default()) + .iter_with_vnode(SINGLETON_VNODE, &range_bounds, Default::default()) .await .unwrap(); @@ -2048,7 +2048,7 @@ async fn test_replicated_state_table_replication() { std::ops::Bound::Unbounded, ); let replicated_iter = replicated_state_table - .iter_with_vnode_and_output_indices(DEFAULT_VNODE, &range_bounds, Default::default()) + .iter_with_vnode_and_output_indices(SINGLETON_VNODE, &range_bounds, Default::default()) .await .unwrap(); pin_mut!(iter); @@ -2079,7 +2079,7 @@ async fn test_replicated_state_table_replication() { let range_bounds: (Bound, Bound) = (std::ops::Bound::Unbounded, std::ops::Bound::Unbounded); let replicated_iter = replicated_state_table - .iter_with_vnode_and_output_indices(DEFAULT_VNODE, &range_bounds, Default::default()) + .iter_with_vnode_and_output_indices(SINGLETON_VNODE, &range_bounds, Default::default()) .await .unwrap(); pin_mut!(replicated_iter); diff --git a/src/stream/src/executor/watermark_filter.rs b/src/stream/src/executor/watermark_filter.rs index 8f8b166626d21..01497c37fdab5 100644 --- a/src/stream/src/executor/watermark_filter.rs +++ b/src/stream/src/executor/watermark_filter.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::cmp; -use std::ops::Deref; use futures::future::{try_join, try_join_all}; use risingwave_common::hash::VnodeBitmapExt; @@ -27,7 +26,6 @@ use risingwave_expr::Result as ExprResult; use risingwave_hummock_sdk::HummockReadEpoch; use risingwave_pb::expr::expr_node::Type; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::TableDistribution; use super::filter::FilterExecutor; use crate::executor::prelude::*; @@ -219,10 +217,7 @@ impl WatermarkFilterExecutor { let mut need_update_global_max_watermark = false; // Update the vnode bitmap for state tables of all agg calls if asked. if let Some(vnode_bitmap) = barrier.as_update_vnode_bitmap(ctx.id) { - let other_vnodes_bitmap = Arc::new( - (!(*vnode_bitmap).clone()) - & TableDistribution::all_vnodes_ref().deref(), - ); + let other_vnodes_bitmap = Arc::new(!(*vnode_bitmap).clone()); let _ = global_watermark_table.update_vnode_bitmap(other_vnodes_bitmap); let (previous_vnode_bitmap, _cache_may_stale) = table.update_vnode_bitmap(vnode_bitmap.clone()); @@ -373,7 +368,9 @@ impl WatermarkFilterExecutor { #[cfg(test)] mod tests { use itertools::Itertools; + use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, TableDesc}; + use risingwave_common::hash::VirtualNode; use risingwave_common::test_prelude::StreamChunkTestExt; use risingwave_common::types::Date; use risingwave_common::util::epoch::test_epoch; @@ -431,7 +428,7 @@ mod tests { let state_table = StateTable::from_table_catalog_inconsistent_op( &table, mem_state.clone(), - Some(TableDistribution::all_vnodes()), + Some(Bitmap::ones(VirtualNode::COUNT_FOR_TEST).into()), ) .await; @@ -440,7 +437,7 @@ mod tests { let storage_table = StorageTable::new_partial( mem_state, val_indices.iter().map(|i| ColumnId::new(*i as _)).collect(), - Some(TableDistribution::all_vnodes()), + Some(Bitmap::ones(VirtualNode::COUNT_FOR_TEST).into()), &desc, ); (storage_table, state_table) diff --git a/src/stream/src/from_proto/mview.rs b/src/stream/src/from_proto/mview.rs index 41fc47609fba7..43fc929edf455 100644 --- a/src/stream/src/from_proto/mview.rs +++ b/src/stream/src/from_proto/mview.rs @@ -100,7 +100,7 @@ impl ExecutorBuilder for ArrangeExecutorBuilder { let table = node.get_table()?; // FIXME: Lookup is now implemented without cell-based table API and relies on all vnodes - // being `DEFAULT_VNODE`, so we need to make the Arrange a singleton. + // being `SINGLETON_VNODE`, so we need to make the Arrange a singleton. let vnodes = params.vnode_bitmap.map(Arc::new); let conflict_behavior = ConflictBehavior::from_protobuf(&table.handle_pk_conflict_behavior()); diff --git a/src/stream/src/from_proto/watermark_filter.rs b/src/stream/src/from_proto/watermark_filter.rs index 0081f00cc39e6..4e3147d10853e 100644 --- a/src/stream/src/from_proto/watermark_filter.rs +++ b/src/stream/src/from_proto/watermark_filter.rs @@ -12,14 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::Deref; use std::sync::Arc; use risingwave_common::catalog::{ColumnId, TableDesc}; use risingwave_expr::expr::build_non_strict_from_prost; use risingwave_pb::stream_plan::WatermarkFilterNode; use risingwave_storage::table::batch_table::storage_table::StorageTable; -use risingwave_storage::table::TableDistribution; use super::*; use crate::common::table::state_table::StateTable; @@ -57,8 +55,7 @@ impl ExecutorBuilder for WatermarkFilterBuilder { .iter() .map(|i| ColumnId::new(*i as _)) .collect_vec(); - let other_vnodes = - Arc::new((!(*vnodes).clone()) & TableDistribution::all_vnodes_ref().deref()); + let other_vnodes = Arc::new(!(*vnodes).clone()); let global_watermark_table = StorageTable::new_partial(store.clone(), column_ids, Some(other_vnodes), &desc); From 3b98b71f27525ad936820c264198c0f02334d252 Mon Sep 17 00:00:00 2001 From: xiangjinwu <17769960+xiangjinwu@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:28:27 +0800 Subject: [PATCH 24/26] fix(source): Protobuf `Any` as canonical JSON (#18380) --- Cargo.lock | 3 + src/connector/Cargo.toml | 2 +- src/connector/codec/src/decoder/mod.rs | 3 + src/connector/src/parser/protobuf/parser.rs | 310 ++++--------------- src/connector/src/parser/unified/mod.rs | 4 +- src/connector/src/parser/unified/protobuf.rs | 16 +- 6 files changed, 79 insertions(+), 259 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1b1ec57fdece..4e648e08a3fea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9216,9 +9216,12 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55a6a9143ae25c25fa7b6a48d6cc08b10785372060009c25140a4e7c340e95af" dependencies = [ + "base64 0.22.0", "once_cell", "prost 0.13.1", "prost-types 0.13.1", + "serde", + "serde-value", ] [[package]] diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index d87e89c1cf65d..a77e9cb929d17 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -103,7 +103,7 @@ pg_bigdecimal = { git = "https://github.com/risingwavelabs/rust-pg_bigdecimal", postgres-openssl = "0.5.0" prometheus = { version = "0.13", features = ["process"] } prost = { workspace = true, features = ["no-recursion-limit"] } -prost-reflect = "0.14" +prost-reflect = { version = "0.14", features = ["serde"] } prost-types = "0.13" protobuf-native = "0.2.2" pulsar = { version = "6.3", default-features = false, features = [ diff --git a/src/connector/codec/src/decoder/mod.rs b/src/connector/codec/src/decoder/mod.rs index 814e06a166c6c..bbfdbf0a90d79 100644 --- a/src/connector/codec/src/decoder/mod.rs +++ b/src/connector/codec/src/decoder/mod.rs @@ -38,6 +38,9 @@ pub enum AccessError { #[error("Unsupported additional column `{name}`")] UnsupportedAdditionalColumn { name: String }, + #[error("Fail to convert protobuf Any into jsonb: {0}")] + ProtobufAnyToJson(#[source] serde_json::Error), + /// Errors that are not categorized into variants above. #[error("{message}")] Uncategorized { message: String }, diff --git a/src/connector/src/parser/protobuf/parser.rs b/src/connector/src/parser/protobuf/parser.rs index 8be25074f6295..ec8c747cafd5a 100644 --- a/src/connector/src/parser/protobuf/parser.rs +++ b/src/connector/src/parser/protobuf/parser.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use anyhow::Context; use itertools::Itertools; use prost_reflect::{ @@ -22,8 +20,7 @@ use prost_reflect::{ }; use risingwave_common::array::{ListValue, StructValue}; use risingwave_common::types::{ - DataType, Datum, DatumCow, Decimal, JsonbRef, JsonbVal, ScalarImpl, ScalarRefImpl, ToDatumRef, - ToOwnedDatum, F32, F64, + DataType, DatumCow, Decimal, JsonbVal, ScalarImpl, ToOwnedDatum, F32, F64, }; use risingwave_common::{bail, try_match_expand}; use risingwave_pb::plan_common::{AdditionalColumn, ColumnDesc, ColumnDescVersion}; @@ -32,9 +29,7 @@ use thiserror_ext::{AsReport, Macro}; use crate::error::ConnectorResult; use crate::parser::unified::protobuf::ProtobufAccess; -use crate::parser::unified::{ - bail_uncategorized, uncategorized, AccessError, AccessImpl, AccessResult, -}; +use crate::parser::unified::{uncategorized, AccessError, AccessImpl, AccessResult}; use crate::parser::util::bytes_from_url; use crate::parser::{AccessBuilder, EncodingProperties}; use crate::schema::schema_registry::{extract_schema_id, handle_sr_list, Client, WireFormatError}; @@ -44,7 +39,6 @@ use crate::schema::SchemaLoader; pub struct ProtobufAccessBuilder { confluent_wire_type: bool, message_descriptor: MessageDescriptor, - descriptor_pool: Arc, } impl AccessBuilder for ProtobufAccessBuilder { @@ -59,10 +53,7 @@ impl AccessBuilder for ProtobufAccessBuilder { let message = DynamicMessage::decode(self.message_descriptor.clone(), payload) .context("failed to parse message")?; - Ok(AccessImpl::Protobuf(ProtobufAccess::new( - message, - Arc::clone(&self.descriptor_pool), - ))) + Ok(AccessImpl::Protobuf(ProtobufAccess::new(message))) } } @@ -71,13 +62,11 @@ impl ProtobufAccessBuilder { let ProtobufParserConfig { confluent_wire_type, message_descriptor, - descriptor_pool, } = config; Ok(Self { confluent_wire_type, message_descriptor, - descriptor_pool, }) } } @@ -86,8 +75,6 @@ impl ProtobufAccessBuilder { pub struct ProtobufParserConfig { confluent_wire_type: bool, pub(crate) message_descriptor: MessageDescriptor, - /// Note that the pub(crate) here is merely for testing - pub(crate) descriptor_pool: Arc, } impl ProtobufParserConfig { @@ -132,7 +119,6 @@ impl ProtobufParserConfig { Ok(Self { message_descriptor, confluent_wire_type: protobuf_config.use_schema_registry, - descriptor_pool: Arc::new(pool), }) } @@ -216,141 +202,9 @@ fn detect_loop_and_push( Ok(()) } -fn extract_any_info(dyn_msg: &DynamicMessage) -> (String, Value) { - debug_assert!( - dyn_msg.fields().count() == 2, - "Expected only two fields for Any Type MessageDescriptor" - ); - - let type_url = dyn_msg - .get_field_by_name("type_url") - .expect("Expect type_url in dyn_msg") - .to_string() - .split('/') - .nth(1) - .map(|part| part[..part.len() - 1].to_string()) - .unwrap_or_default(); - - let payload = dyn_msg - .get_field_by_name("value") - .expect("Expect value (payload) in dyn_msg") - .as_ref() - .clone(); - - (type_url, payload) -} - -/// TODO: Resolve the potential naming conflict in the map -/// i.e., If the two anonymous type shares the same key (e.g., "Int32"), -/// the latter will overwrite the former one in `serde_json::Map`. -/// Possible solution, maintaining a global id map, for the same types -/// In the same level of fields, add the unique id at the tail of the name. -/// e.g., "Int32.1" & "Int32.2" in the above example -fn recursive_parse_json( - fields: &[Datum], - full_name_vec: Option>, - full_name: Option, -) -> serde_json::Value { - // Note that the key is of no order - let mut ret: serde_json::Map = serde_json::Map::new(); - - // The hidden type hint for user's convenience - // i.e., `"_type": message.full_name()` - if let Some(full_name) = full_name { - ret.insert("_type".to_string(), serde_json::Value::String(full_name)); - } - - for (idx, field) in fields.iter().enumerate() { - let mut key; - if let Some(k) = full_name_vec.as_ref() { - key = k[idx].to_string(); - } else { - key = "".to_string(); - } - - match field.clone() { - Some(ScalarImpl::Int16(v)) => { - if key.is_empty() { - key = "Int16".to_string(); - } - ret.insert(key, serde_json::Value::Number(serde_json::Number::from(v))); - } - Some(ScalarImpl::Int32(v)) => { - if key.is_empty() { - key = "Int32".to_string(); - } - ret.insert(key, serde_json::Value::Number(serde_json::Number::from(v))); - } - Some(ScalarImpl::Int64(v)) => { - if key.is_empty() { - key = "Int64".to_string(); - } - ret.insert(key, serde_json::Value::Number(serde_json::Number::from(v))); - } - Some(ScalarImpl::Bool(v)) => { - if key.is_empty() { - key = "Bool".to_string(); - } - ret.insert(key, serde_json::Value::Bool(v)); - } - Some(ScalarImpl::Bytea(v)) => { - if key.is_empty() { - key = "Bytea".to_string(); - } - let s = String::from_utf8(v.to_vec()).unwrap(); - ret.insert(key, serde_json::Value::String(s)); - } - Some(ScalarImpl::Float32(v)) => { - if key.is_empty() { - key = "Int16".to_string(); - } - ret.insert( - key, - serde_json::Value::Number( - serde_json::Number::from_f64(v.into_inner() as f64).unwrap(), - ), - ); - } - Some(ScalarImpl::Float64(v)) => { - if key.is_empty() { - key = "Float64".to_string(); - } - ret.insert( - key, - serde_json::Value::Number( - serde_json::Number::from_f64(v.into_inner()).unwrap(), - ), - ); - } - Some(ScalarImpl::Utf8(v)) => { - if key.is_empty() { - key = "Utf8".to_string(); - } - ret.insert(key, serde_json::Value::String(v.to_string())); - } - Some(ScalarImpl::Struct(v)) => { - if key.is_empty() { - key = "Struct".to_string(); - } - ret.insert(key, recursive_parse_json(v.fields(), None, None)); - } - Some(ScalarImpl::Jsonb(v)) => { - if key.is_empty() { - key = "Jsonb".to_string(); - } - ret.insert(key, v.take()); - } - r#type => panic!("Not yet support ScalarImpl type: {:?}", r#type), - } - } - - serde_json::Value::Object(ret) -} - pub fn from_protobuf_value<'a>( field_desc: &FieldDescriptor, value: &'a Value, - descriptor_pool: &Arc, ) -> AccessResult> { let kind = field_desc.kind(); @@ -382,62 +236,9 @@ pub fn from_protobuf_value<'a>( } Value::Message(dyn_msg) => { if dyn_msg.descriptor().full_name() == "google.protobuf.Any" { - // If the fields are not presented, default value is an empty string - if !dyn_msg.has_field_by_name("type_url") || !dyn_msg.has_field_by_name("value") { - borrowed!(JsonbRef::empty_string()); - } - - // Sanity check - debug_assert!( - dyn_msg.has_field_by_name("type_url") && dyn_msg.has_field_by_name("value"), - "`type_url` & `value` must exist in fields of `dyn_msg`" - ); - - // The message is of type `Any` - let (type_url, payload) = extract_any_info(dyn_msg); - - let payload_field_desc = dyn_msg.descriptor().get_field_by_name("value").unwrap(); - - let payload = from_protobuf_value(&payload_field_desc, &payload, descriptor_pool)?; - let Some(ScalarRefImpl::Bytea(payload)) = payload.to_datum_ref() else { - bail_uncategorized!("expected bytes for dynamic message payload"); - }; - - // Get the corresponding schema from the descriptor pool - let msg_desc = descriptor_pool - .get_message_by_name(&type_url) - .ok_or_else(|| { - uncategorized!("message `{type_url}` not found in descriptor pool") - })?; - - let f = msg_desc - .clone() - .fields() - .map(|f| f.name().to_string()) - .collect::>(); - - let full_name = msg_desc.clone().full_name().to_string(); - - // Decode the payload based on the `msg_desc` - let decoded_value = DynamicMessage::decode(msg_desc, payload).unwrap(); - let decoded_value = from_protobuf_value( - field_desc, - &Value::Message(decoded_value), - descriptor_pool, - )? - .to_owned_datum() - .unwrap(); - - // Extract the struct value - let ScalarImpl::Struct(v) = decoded_value else { - panic!("Expect ScalarImpl::Struct"); - }; - - ScalarImpl::Jsonb(JsonbVal::from(serde_json::json!(recursive_parse_json( - v.fields(), - Some(f), - Some(full_name), - )))) + ScalarImpl::Jsonb(JsonbVal::from( + serde_json::to_value(dyn_msg).map_err(AccessError::ProtobufAnyToJson)?, + )) } else { let mut rw_values = Vec::with_capacity(dyn_msg.descriptor().fields().len()); // fields is a btree map in descriptor @@ -454,9 +255,7 @@ pub fn from_protobuf_value<'a>( } // use default value if dyn_msg doesn't has this field let value = dyn_msg.get_field(&field_desc); - rw_values.push( - from_protobuf_value(&field_desc, &value, descriptor_pool)?.to_owned_datum(), - ); + rw_values.push(from_protobuf_value(&field_desc, &value)?.to_owned_datum()); } ScalarImpl::Struct(StructValue::new(rw_values)) } @@ -466,7 +265,7 @@ pub fn from_protobuf_value<'a>( .map_err(|e| uncategorized!("{}", e.to_report_string()))?; let mut builder = data_type.as_list().create_array_builder(values.len()); for value in values { - builder.append(from_protobuf_value(field_desc, value, descriptor_pool)?); + builder.append(from_protobuf_value(field_desc, value)?); } ScalarImpl::List(ListValue::new(builder.finish())) } @@ -498,25 +297,18 @@ fn protobuf_type_mapping( } Kind::Uint64 | Kind::Fixed64 => DataType::Decimal, Kind::String => DataType::Varchar, - Kind::Message(m) => { - let fields = m - .fields() - .map(|f| protobuf_type_mapping(&f, parse_trace)) - .try_collect()?; - let field_names = m.fields().map(|f| f.name().to_string()).collect_vec(); - - // Note that this part is useful for actual parsing - // Since RisingWave will parse message to `ScalarImpl::Jsonb` - // Please do NOT modify it - if field_names.len() == 2 - && field_names.contains(&"value".to_string()) - && field_names.contains(&"type_url".to_string()) - { - DataType::Jsonb - } else { + Kind::Message(m) => match m.full_name() { + // Well-Known Types are identified by their full name + "google.protobuf.Any" => DataType::Jsonb, + _ => { + let fields = m + .fields() + .map(|f| protobuf_type_mapping(&f, parse_trace)) + .try_collect()?; + let field_names = m.fields().map(|f| f.name().to_string()).collect_vec(); DataType::new_struct(fields, field_names) } - } + }, Kind::Enum(_) => DataType::Varchar, Kind::Bytes => DataType::Bytea, }; @@ -973,10 +765,9 @@ mod test { // This is of no use let field = value.fields().next().unwrap().0; - if let Some(ret) = - from_protobuf_value(&field, &Value::Message(value), &conf.descriptor_pool) - .unwrap() - .to_owned_datum() + if let Some(ret) = from_protobuf_value(&field, &Value::Message(value)) + .unwrap() + .to_owned_datum() { println!("Decoded Value for ANY_GEN_PROTO_DATA: {:#?}", ret); println!("---------------------------"); @@ -1000,7 +791,7 @@ mod test { assert_eq!( jv, JsonbVal::from(json!({ - "_type": "test.StringValue", + "@type": "type.googleapis.com/test.StringValue", "value": "John Doe" })) ); @@ -1036,10 +827,9 @@ mod test { // This is of no use let field = value.fields().next().unwrap().0; - if let Some(ret) = - from_protobuf_value(&field, &Value::Message(value), &conf.descriptor_pool) - .unwrap() - .to_owned_datum() + if let Some(ret) = from_protobuf_value(&field, &Value::Message(value)) + .unwrap() + .to_owned_datum() { println!("Decoded Value for ANY_GEN_PROTO_DATA: {:#?}", ret); println!("---------------------------"); @@ -1063,7 +853,7 @@ mod test { assert_eq!( jv, JsonbVal::from(json!({ - "_type": "test.Int32Value", + "@type": "type.googleapis.com/test.Int32Value", "value": 114514 })) ); @@ -1110,10 +900,9 @@ mod test { // This is of no use let field = value.fields().next().unwrap().0; - if let Some(ret) = - from_protobuf_value(&field, &Value::Message(value), &conf.descriptor_pool) - .unwrap() - .to_owned_datum() + if let Some(ret) = from_protobuf_value(&field, &Value::Message(value)) + .unwrap() + .to_owned_datum() { println!("Decoded Value for ANY_RECURSIVE_GEN_PROTO_DATA: {:#?}", ret); println!("---------------------------"); @@ -1137,13 +926,13 @@ mod test { assert_eq!( jv, JsonbVal::from(json!({ - "_type": "test.AnyValue", - "any_value_1": { - "_type": "test.StringValue", + "@type": "type.googleapis.com/test.AnyValue", + "anyValue1": { + "@type": "type.googleapis.com/test.StringValue", "value": "114514", }, - "any_value_2": { - "_type": "test.Int32Value", + "anyValue2": { + "@type": "type.googleapis.com/test.Int32Value", "value": 114514, } })) @@ -1156,6 +945,37 @@ mod test { Ok(()) } + // id: 12345 + // any_value: { + // type_url: "type.googleapis.com/test.StringXalue" + // value: "\n\010John Doe" + // } + static ANY_GEN_PROTO_DATA_INVALID: &[u8] = b"\x08\xb9\x60\x12\x32\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x58\x61\x6c\x75\x65\x12\x0a\x0a\x08\x4a\x6f\x68\x6e\x20\x44\x6f\x65"; + + #[tokio::test] + async fn test_any_invalid() -> crate::error::ConnectorResult<()> { + let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; + + let value = + DynamicMessage::decode(conf.message_descriptor.clone(), ANY_GEN_PROTO_DATA_INVALID) + .unwrap(); + + // The top-level `Value` is not a proto field, but we need a dummy one. + let field = value.fields().next().unwrap().0; + + let err = from_protobuf_value(&field, &Value::Message(value)).unwrap_err(); + + let expected = expect_test::expect![[r#" + Fail to convert protobuf Any into jsonb + + Caused by: + message 'test.StringXalue' not found + "#]]; + expected.assert_eq(err.to_report_string_pretty().as_str()); + + Ok(()) + } + #[test] fn test_decode_varint_zigzag() { // 1. Positive number diff --git a/src/connector/src/parser/unified/mod.rs b/src/connector/src/parser/unified/mod.rs index 8045ce0132401..fdfe3aae6aaee 100644 --- a/src/connector/src/parser/unified/mod.rs +++ b/src/connector/src/parser/unified/mod.rs @@ -17,9 +17,7 @@ use auto_impl::auto_impl; use risingwave_common::types::{DataType, DatumCow}; use risingwave_connector_codec::decoder::avro::AvroAccess; -pub use risingwave_connector_codec::decoder::{ - bail_uncategorized, uncategorized, Access, AccessError, AccessResult, -}; +pub use risingwave_connector_codec::decoder::{uncategorized, Access, AccessError, AccessResult}; use self::bytes::BytesAccess; use self::json::JsonAccess; diff --git a/src/connector/src/parser/unified/protobuf.rs b/src/connector/src/parser/unified/protobuf.rs index 02febc22db247..b1d34746b5029 100644 --- a/src/connector/src/parser/unified/protobuf.rs +++ b/src/connector/src/parser/unified/protobuf.rs @@ -13,9 +13,9 @@ // limitations under the License. use std::borrow::Cow; -use std::sync::{Arc, LazyLock}; +use std::sync::LazyLock; -use prost_reflect::{DescriptorPool, DynamicMessage, ReflectMessage}; +use prost_reflect::{DynamicMessage, ReflectMessage}; use risingwave_common::log::LogSuppresser; use risingwave_common::types::{DataType, DatumCow, ToOwnedDatum}; use thiserror_ext::AsReport; @@ -26,15 +26,11 @@ use crate::parser::unified::uncategorized; pub struct ProtobufAccess { message: DynamicMessage, - descriptor_pool: Arc, } impl ProtobufAccess { - pub fn new(message: DynamicMessage, descriptor_pool: Arc) -> Self { - Self { - message, - descriptor_pool, - } + pub fn new(message: DynamicMessage) -> Self { + Self { message } } } @@ -59,10 +55,10 @@ impl Access for ProtobufAccess { })?; match self.message.get_field(&field_desc) { - Cow::Borrowed(value) => from_protobuf_value(&field_desc, value, &self.descriptor_pool), + Cow::Borrowed(value) => from_protobuf_value(&field_desc, value), // `Owned` variant occurs only if there's no such field and the default value is returned. - Cow::Owned(value) => from_protobuf_value(&field_desc, &value, &self.descriptor_pool) + Cow::Owned(value) => from_protobuf_value(&field_desc, &value) // enforce `Owned` variant to avoid returning a reference to a temporary value .map(|d| d.to_owned_datum().into()), } From f2f58272cc51a118a83c1fce02bd42afeb98c80b Mon Sep 17 00:00:00 2001 From: zwang28 <70626450+zwang28@users.noreply.github.com> Date: Thu, 5 Sep 2024 15:02:08 +0800 Subject: [PATCH 25/26] fix(storage): fix assertion (#18413) --- src/storage/hummock_sdk/src/sstable_info.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/storage/hummock_sdk/src/sstable_info.rs b/src/storage/hummock_sdk/src/sstable_info.rs index 2f64508e57314..9970c60f506c8 100644 --- a/src/storage/hummock_sdk/src/sstable_info.rs +++ b/src/storage/hummock_sdk/src/sstable_info.rs @@ -136,7 +136,7 @@ impl From<&PbSstableInfo> for SstableInfo { impl From for PbSstableInfo { fn from(sstable_info: SstableInfo) -> Self { - assert_ne!(0, sstable_info.sst_size); + assert!(sstable_info.sst_size > 0 || sstable_info.is_stripped()); PbSstableInfo { object_id: sstable_info.object_id, sst_id: sstable_info.sst_id, @@ -174,7 +174,7 @@ impl From for PbSstableInfo { impl From<&SstableInfo> for PbSstableInfo { fn from(sstable_info: &SstableInfo) -> Self { - assert_ne!(0, sstable_info.sst_size); + assert!(sstable_info.sst_size > 0 || sstable_info.is_stripped()); PbSstableInfo { object_id: sstable_info.object_id, sst_id: sstable_info.sst_id, @@ -212,3 +212,10 @@ impl SstableInfo { self.key_range = KeyRange::default(); } } + +// Time travel +impl SstableInfo { + pub fn is_stripped(&self) -> bool { + self.object_id == 0 + } +} From cb29fe0a3221638b5dc62c3ea543b08dd567135d Mon Sep 17 00:00:00 2001 From: lmatz Date: Thu, 5 Sep 2024 15:23:40 +0800 Subject: [PATCH 26/26] feat: support more SSL related configurations in Kafka connector (#18361) --- src/connector/src/connector_common/common.rs | 21 ++++++++++++++++++++ src/connector/with_options_sink.yaml | 12 +++++++++++ src/connector/with_options_source.yaml | 12 +++++++++++ 3 files changed, 45 insertions(+) diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs index b522ae2eda560..9f4211aedd4d9 100644 --- a/src/connector/src/connector_common/common.rs +++ b/src/connector/src/connector_common/common.rs @@ -192,14 +192,26 @@ pub struct KafkaCommon { #[serde(rename = "properties.ssl.ca.location")] ssl_ca_location: Option, + /// CA certificate string (PEM format) for verifying the broker's key. + #[serde(rename = "properties.ssl.ca.pem")] + ssl_ca_pem: Option, + /// Path to client's certificate file (PEM). #[serde(rename = "properties.ssl.certificate.location")] ssl_certificate_location: Option, + /// Client's public key string (PEM format) used for authentication. + #[serde(rename = "properties.ssl.certificate.pem")] + ssl_certificate_pem: Option, + /// Path to client's private key file (PEM). #[serde(rename = "properties.ssl.key.location")] ssl_key_location: Option, + /// Client's private key string (PEM format) used for authentication. + #[serde(rename = "properties.ssl.key.pem")] + ssl_key_pem: Option, + /// Passphrase of client's private key. #[serde(rename = "properties.ssl.key.password")] ssl_key_password: Option, @@ -325,12 +337,21 @@ impl KafkaCommon { if let Some(ssl_ca_location) = self.ssl_ca_location.as_ref() { config.set("ssl.ca.location", ssl_ca_location); } + if let Some(ssl_ca_pem) = self.ssl_ca_pem.as_ref() { + config.set("ssl.ca.pem", ssl_ca_pem); + } if let Some(ssl_certificate_location) = self.ssl_certificate_location.as_ref() { config.set("ssl.certificate.location", ssl_certificate_location); } + if let Some(ssl_certificate_pem) = self.ssl_certificate_pem.as_ref() { + config.set("ssl.certificate.pem", ssl_certificate_pem); + } if let Some(ssl_key_location) = self.ssl_key_location.as_ref() { config.set("ssl.key.location", ssl_key_location); } + if let Some(ssl_key_pem) = self.ssl_key_pem.as_ref() { + config.set("ssl.key.pem", ssl_key_pem); + } if let Some(ssl_key_password) = self.ssl_key_password.as_ref() { config.set("ssl.key.password", ssl_key_password); } diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index cc92f9a0a664a..e8a8efff68801 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -373,14 +373,26 @@ KafkaConfig: field_type: String comments: Path to CA certificate file for verifying the broker's key. required: false + - name: properties.ssl.ca.pem + field_type: String + comments: CA certificate string (PEM format) for verifying the broker's key. + required: false - name: properties.ssl.certificate.location field_type: String comments: Path to client's certificate file (PEM). required: false + - name: properties.ssl.certificate.pem + field_type: String + comments: Client's public key string (PEM format) used for authentication. + required: false - name: properties.ssl.key.location field_type: String comments: Path to client's private key file (PEM). required: false + - name: properties.ssl.key.pem + field_type: String + comments: Client's private key string (PEM format) used for authentication. + required: false - name: properties.ssl.key.password field_type: String comments: Passphrase of client's private key. diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml index 4eaf1e0d3db4b..a6a19e80c89a3 100644 --- a/src/connector/with_options_source.yaml +++ b/src/connector/with_options_source.yaml @@ -199,14 +199,26 @@ KafkaProperties: field_type: String comments: Path to CA certificate file for verifying the broker's key. required: false + - name: properties.ssl.ca.pem + field_type: String + comments: CA certificate string (PEM format) for verifying the broker's key. + required: false - name: properties.ssl.certificate.location field_type: String comments: Path to client's certificate file (PEM). required: false + - name: properties.ssl.certificate.pem + field_type: String + comments: Client's public key string (PEM format) used for authentication. + required: false - name: properties.ssl.key.location field_type: String comments: Path to client's private key file (PEM). required: false + - name: properties.ssl.key.pem + field_type: String + comments: Client's private key string (PEM format) used for authentication. + required: false - name: properties.ssl.key.password field_type: String comments: Passphrase of client's private key.