From fbc389a21783f224487b1f10e918f9c876e37393 Mon Sep 17 00:00:00 2001 From: TCeason Date: Fri, 6 Dec 2024 20:54:26 +0800 Subject: [PATCH] refactor: revert pr #16869 Metadata query performance has regressed in some cases. Suggest reverting to resolve quickly and fix properly next week. --- src/query/storages/system/Cargo.toml | 1 - src/query/storages/system/src/tables_table.rs | 395 +++++++----------- 2 files changed, 160 insertions(+), 236 deletions(-) diff --git a/src/query/storages/system/Cargo.toml b/src/query/storages/system/Cargo.toml index 3b31c6c3966f..d36f47e0c688 100644 --- a/src/query/storages/system/Cargo.toml +++ b/src/query/storages/system/Cargo.toml @@ -25,7 +25,6 @@ databend-common-config = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } databend-common-functions = { workspace = true } -databend-common-management = { workspace = true } databend-common-meta-api = { workspace = true } databend-common-meta-app = { workspace = true } databend-common-meta-types = { workspace = true } diff --git a/src/query/storages/system/src/tables_table.rs b/src/query/storages/system/src/tables_table.rs index f092b4c22b62..9d427bc696d3 100644 --- a/src/query/storages/system/src/tables_table.rs +++ b/src/query/storages/system/src/tables_table.rs @@ -38,7 +38,6 @@ use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; use databend_common_functions::BUILTIN_FUNCTIONS; -use databend_common_management::RoleApi; use databend_common_meta_app::principal::OwnershipObject; use databend_common_meta_app::schema::database_name_ident::DatabaseNameIdent; use databend_common_meta_app::schema::TableIdent; @@ -47,6 +46,7 @@ use databend_common_meta_app::schema::TableMeta; use databend_common_meta_app::tenant::Tenant; use databend_common_storages_fuse::FuseTable; use databend_common_storages_view::view_table::QUERY; +use databend_common_users::GrantObjectVisibilityChecker; use databend_common_users::UserApiProvider; use log::warn; @@ -129,7 +129,9 @@ where TablesTable: HistoryAware .into_iter() .map(|cat| cat.disable_table_info_refresh()) .collect::>>()?; - self.get_full_data_from_catalogs(ctx, push_downs, catalogs) + let visibility_checker = ctx.get_visibility_checker(false).await?; + + self.get_full_data_from_catalogs(ctx, push_downs, catalogs, visibility_checker) .await } } @@ -235,6 +237,7 @@ where TablesTable: HistoryAware ctx: Arc, push_downs: Option, catalogs: Vec>, + visibility_checker: GrantObjectVisibilityChecker, ) -> Result { let tenant = ctx.get_tenant(); @@ -325,277 +328,199 @@ where TablesTable: HistoryAware ); } } - - // from system.tables where database = 'db' and name = 'name' - // from system.tables where database = 'db' and table_id = 123 - if db_name.len() == 1 - && !invalid_optimize - && tables_names.len() + tables_ids.len() == 1 - && !invalid_tables_ids - && !WITH_HISTORY - { - let visibility_checker = ctx.get_visibility_checker(true).await?; - for (ctl_name, ctl) in ctls.iter() { - for db in &db_name { - match ctl.get_database(&tenant, db.as_str()).await { - Ok(database) => dbs.push(database), - Err(err) => { - let msg = format!("Failed to get database: {}, {}", db, err); - warn!("{}", msg); + let catalog_dbs = visibility_checker.get_visibility_database(); + + for (ctl_name, ctl) in ctls.iter() { + if let Some(push_downs) = &push_downs { + if push_downs.filters.as_ref().map(|f| &f.filter).is_some() { + for db in &db_name { + match ctl.get_database(&tenant, db.as_str()).await { + Ok(database) => dbs.push(database), + Err(err) => { + let msg = format!("Failed to get database: {}, {}", db, err); + warn!("{}", msg); + } } } - } - if let Err(err) = ctl.mget_table_names_by_ids(&tenant, &tables_ids).await { - warn!("Failed to get tables: {}, {}", ctl.name(), err); - } else { - let new_tables_names = ctl - .mget_table_names_by_ids(&tenant, &tables_ids) - .await? - .into_iter() - .flatten() - .filter(|table| !tables_names.contains(table)) - .collect::>(); - tables_names.extend(new_tables_names); - } - for table_name in &tables_names { - for db in &dbs { - match ctl.get_table(&tenant, db.name(), table_name).await { - Ok(t) => { - let db_id = db.get_db_info().database_id.db_id; - let table_id = t.get_id(); - let role = user_api - .role_api(&tenant) - .get_ownership(&OwnershipObject::Table { - catalog_name: ctl_name.to_string(), - db_id, - table_id, - }) - .await? - .map(|o| o.role); - if visibility_checker.check_table_visibility( - ctl_name, - db.name(), - table_name, - db_id, - t.get_id(), - ) { - catalogs.push(ctl_name.as_str()); - databases.push(db.name().to_owned()); - database_tables.push(t); - owner.push(role); - } else if let Some(role) = role { - let roles = ctx.get_all_effective_roles().await?; - if roles.iter().any(|r| r.name == role) { - catalogs.push(ctl_name.as_str()); - databases.push(db.name().to_owned()); - database_tables.push(t); - owner.push(Some(role)); + if !WITH_HISTORY { + match ctl.mget_table_names_by_ids(&tenant, &tables_ids).await { + Ok(tables) => { + for table in tables.into_iter().flatten() { + if !tables_names.contains(&table) { + tables_names.push(table.clone()); } } } Err(err) => { - let msg = format!( - "Failed to get table in database: {}, {}", - db.name(), - err - ); - // warn no need to pad in ctx + let msg = format!("Failed to get tables: {}, {}", ctl.name(), err); warn!("{}", msg); - continue; } } } } } - } else { - let visibility_checker = ctx.get_visibility_checker(false).await?; - let catalog_dbs = visibility_checker.get_visibility_database(); - - for (ctl_name, ctl) in ctls.iter() { - if let Some(push_downs) = &push_downs { - if push_downs.filters.as_ref().map(|f| &f.filter).is_some() { - for db in &db_name { - match ctl.get_database(&tenant, db.as_str()).await { - Ok(database) => dbs.push(database), - Err(err) => { - let msg = format!("Failed to get database: {}, {}", db, err); - warn!("{}", msg); - } + + if dbs.is_empty() || invalid_optimize { + // None means has global level privileges + dbs = if let Some(catalog_dbs) = &catalog_dbs { + let mut final_dbs = vec![]; + for (catalog_name, dbs) in catalog_dbs { + if ctl.name() == catalog_name.to_string() { + let mut catalog_db_ids = vec![]; + let mut catalog_db_names = vec![]; + catalog_db_names.extend( + dbs.iter() + .filter_map(|(db_name, _)| *db_name) + .map(|db_name| db_name.to_string()), + ); + catalog_db_ids.extend(dbs.iter().filter_map(|(_, db_id)| *db_id)); + if let Ok(databases) = ctl + .mget_database_names_by_ids(&tenant, &catalog_db_ids) + .await + { + catalog_db_names.extend(databases.into_iter().flatten()); + } else { + let msg = + format!("Failed to get database name by id: {}", ctl.name()); + warn!("{}", msg); } + let db_idents = catalog_db_names + .iter() + .map(|name| DatabaseNameIdent::new(&tenant, name)) + .collect::>(); + let dbs = ctl.mget_databases(&tenant, &db_idents).await?; + final_dbs.extend(dbs); } + } + final_dbs + } else { + match ctl.list_databases(&tenant).await { + Ok(dbs) => dbs, + Err(err) => { + let msg = + format!("List databases failed on catalog {}: {}", ctl.name(), err); + warn!("{}", msg); + ctx.push_warning(msg); - if !WITH_HISTORY { - match ctl.mget_table_names_by_ids(&tenant, &tables_ids).await { - Ok(tables) => { - for table in tables.into_iter().flatten() { - if !tables_names.contains(&table) { - tables_names.push(table.clone()); - } - } - } - Err(err) => { - let msg = - format!("Failed to get tables: {}, {}", ctl.name(), err); - warn!("{}", msg); - } - } + vec![] } } } + } - if dbs.is_empty() || invalid_optimize { - // None means has global level privileges - dbs = if let Some(catalog_dbs) = &catalog_dbs { - let mut final_dbs = vec![]; - for (catalog_name, dbs) in catalog_dbs { - if ctl.name() == catalog_name.to_string() { - let mut catalog_db_ids = vec![]; - let mut catalog_db_names = vec![]; - catalog_db_names.extend( - dbs.iter() - .filter_map(|(db_name, _)| *db_name) - .map(|db_name| db_name.to_string()), - ); - catalog_db_ids.extend(dbs.iter().filter_map(|(_, db_id)| *db_id)); - if let Ok(databases) = ctl - .mget_database_names_by_ids(&tenant, &catalog_db_ids) - .await - { - catalog_db_names.extend(databases.into_iter().flatten()); - } else { - let msg = format!( - "Failed to get database name by id: {}", - ctl.name() - ); - warn!("{}", msg); - } - let db_idents = catalog_db_names - .iter() - .map(|name| DatabaseNameIdent::new(&tenant, name)) - .collect::>(); - let dbs = ctl.mget_databases(&tenant, &db_idents).await?; - final_dbs.extend(dbs); - } + let final_dbs = dbs + .clone() + .into_iter() + .filter(|db| { + visibility_checker.check_database_visibility( + ctl_name, + db.name(), + db.get_db_info().database_id.db_id, + ) + }) + .collect::>(); + + let ownership = if get_ownership { + user_api.get_ownerships(&tenant).await.unwrap_or_default() + } else { + HashMap::new() + }; + for db in final_dbs { + let db_id = db.get_db_info().database_id.db_id; + let db_name = db.name(); + let tables = if tables_names.is_empty() + || tables_names.len() > 10 + || invalid_tables_ids + || invalid_optimize + { + match Self::list_tables(ctl, &tenant, db_name, WITH_HISTORY, WITHOUT_VIEW).await + { + Ok(tables) => tables, + Err(err) => { + // swallow the errors related with remote database or tables, avoid ANY of bad table config corrupt ALL of the results. + // these databases might be: + // - sharing database + // - hive database + // - iceberg database + // - others + // TODO(liyz): return the warnings in the HTTP query protocol. + let msg = + format!("Failed to list tables in database: {}, {}", db_name, err); + warn!("{}", msg); + ctx.push_warning(msg); + + continue; } - final_dbs - } else { - match ctl.list_databases(&tenant).await { - Ok(dbs) => dbs, + } + } else if WITH_HISTORY { + // Only can call get_table + let mut tables = Vec::new(); + for table_name in &tables_names { + match ctl.get_table_history(&tenant, db_name, table_name).await { + Ok(t) => tables.extend(t), Err(err) => { let msg = format!( - "List databases failed on catalog {}: {}", - ctl.name(), - err + "Failed to get_table_history tables in database: {}, {}", + db_name, err ); + // warn no need to pad in ctx warn!("{}", msg); - ctx.push_warning(msg); - - vec![] + continue; } } } - } - - let final_dbs = dbs - .clone() - .into_iter() - .filter(|db| { - visibility_checker.check_database_visibility( - ctl_name, - db.name(), - db.get_db_info().database_id.db_id, - ) - }) - .collect::>(); - - let ownership = if get_ownership { - user_api.get_ownerships(&tenant).await.unwrap_or_default() + tables } else { - HashMap::new() - }; - for db in final_dbs { - let db_id = db.get_db_info().database_id.db_id; - let db_name = db.name(); - let tables = if tables_names.is_empty() - || tables_names.len() > 10 - || invalid_tables_ids - || invalid_optimize - { - match Self::list_tables(ctl, &tenant, db_name, WITH_HISTORY, WITHOUT_VIEW) - .await - { - Ok(tables) => tables, + // Only can call get_table + let mut tables = Vec::new(); + for table_name in &tables_names { + match ctl.get_table(&tenant, db_name, table_name).await { + Ok(t) => tables.push(t), Err(err) => { - // swallow the errors related with remote database or tables, avoid ANY of bad table config corrupt ALL of the results. - // these databases might be: - // - sharing database - // - hive database - // - iceberg database - // - others - // TODO(liyz): return the warnings in the HTTP query protocol. let msg = format!( - "Failed to list tables in database: {}, {}", + "Failed to get table in database: {}, {}", db_name, err ); + // warn no need to pad in ctx warn!("{}", msg); - ctx.push_warning(msg); - continue; } } - } else if WITH_HISTORY { - // Only can call get_table - let mut tables = Vec::new(); - for table_name in &tables_names { - match ctl.get_table_history(&tenant, db_name, table_name).await { - Ok(t) => tables.extend(t), - Err(err) => { - let msg = format!( - "Failed to get_table_history tables in database: {}, {}", - db_name, err - ); - // warn no need to pad in ctx - warn!("{}", msg); - continue; - } - } - } - tables - } else { - // Only can call get_table - let mut tables = Vec::new(); - for table_name in &tables_names { - match ctl.get_table(&tenant, db_name, table_name).await { - Ok(t) => tables.push(t), - Err(err) => { - let msg = format!( - "Failed to get table in database: {}, {}", - db_name, err - ); - // warn no need to pad in ctx - warn!("{}", msg); - continue; - } + } + tables + }; + + for table in tables { + let table_id = table.get_id(); + // If db1 is visible, do not mean db1.table1 is visible. A user may have a grant about db1.table2, so db1 is visible + // for her, but db1.table1 may be not visible. So we need an extra check about table here after db visibility check. + if visibility_checker.check_table_visibility( + ctl_name, + db_name, + table.name(), + db_id, + table_id, + ) && !table.is_stream() + { + if !WITHOUT_VIEW && table.get_table_info().engine() == "VIEW" { + catalogs.push(ctl_name.as_str()); + databases.push(db_name.to_owned()); + database_tables.push(table); + if ownership.is_empty() { + owner.push(None); + } else { + owner.push( + ownership + .get(&OwnershipObject::Table { + catalog_name: ctl_name.to_string(), + db_id, + table_id, + }) + .map(|role| role.to_string()), + ); } - } - tables - }; - - for table in tables { - let table_id = table.get_id(); - // If db1 is visible, do not mean db1.table1 is visible. A user may have a grant about db1.table2, so db1 is visible - // for her, but db1.table1 may be not visible. So we need an extra check about table here after db visibility check. - if (table.get_table_info().engine() == "VIEW" || WITHOUT_VIEW) - && !table.is_stream() - && visibility_checker.check_table_visibility( - ctl_name, - db_name, - table.name(), - db_id, - table_id, - ) - { + } else if WITHOUT_VIEW { // system.tables store view name but not store view query // decrease information_schema.tables union. catalogs.push(ctl_name.as_str());