diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt index 8102ebce08..a979a9804b 100644 --- a/nexus/external-api/output/nexus_tags.txt +++ b/nexus/external-api/output/nexus_tags.txt @@ -30,6 +30,7 @@ probe_create POST /experimental/v1/probes probe_delete DELETE /experimental/v1/probes/{probe} probe_list GET /experimental/v1/probes probe_view GET /experimental/v1/probes/{probe} +timeseries_query POST /v1/timeseries/query API operations found with tag "images" OPERATION ID METHOD URL PATH diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs index 1c5c7c1d2d..e2b53a7e6f 100644 --- a/nexus/external-api/src/lib.rs +++ b/nexus/external-api/src/lib.rs @@ -2567,6 +2567,26 @@ pub trait NexusExternalApi { body: TypedBody, ) -> Result, HttpError>; + // TODO: list endpoint for project-scoped schemas is blocked on + // https://github.com/oxidecomputer/omicron/issues/5942: the authz scope for + // each schema is not stored in Clickhouse yet. + + /// Run project-scoped timeseries query + /// + /// Queries are written in OxQL. Project must be specified by name or ID in + /// URL query parameter. The OxQL query will only return timeseries data + /// from the specified project. + #[endpoint { + method = POST, + path = "/v1/timeseries/query", + tags = ["hidden"], + }] + async fn timeseries_query( + rqctx: RequestContext, + query_params: Query, + body: TypedBody, + ) -> Result, HttpError>; + // Updates /// Upload TUF repository diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 40f7882281..5b77e681b1 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -140,28 +140,52 @@ impl super::Nexus { self.timeseries_client .oxql_query(query) .await - .map(|result| { - // TODO-observability: The query method returns information - // about the duration of the OxQL query and the database - // resource usage for each contained SQL query. We should - // publish this as a timeseries itself, so that we can track - // improvements to query processing. - // - // For now, simply return the tables alone. - result.tables - }) - .map_err(|e| match e { - oximeter_db::Error::DatabaseUnavailable(_) - | oximeter_db::Error::Connection(_) => { - Error::ServiceUnavailable { - internal_message: e.to_string(), - } - } - oximeter_db::Error::Oxql(_) - | oximeter_db::Error::TimeseriesNotFound(_) => { - Error::invalid_request(e.to_string()) - } - _ => Error::InternalError { internal_message: e.to_string() }, - }) + // TODO-observability: The query method returns information + // about the duration of the OxQL query and the database + // resource usage for each contained SQL query. We should + // publish this as a timeseries itself, so that we can track + // improvements to query processing. + // + // For now, simply return the tables alone. + .map(|result| result.tables) + .map_err(map_timeseries_err) + } + + /// Run an OxQL query against the timeseries database, scoped to a specific project. + pub(crate) async fn timeseries_query_project( + &self, + _opctx: &OpContext, + project_lookup: &lookup::Project<'_>, + query: impl AsRef, + ) -> Result, Error> { + // Ensure the user has read access to the project + let (authz_silo, authz_project) = + project_lookup.lookup_for(authz::Action::Read).await?; + + // Ensure the query only refers to the project + let filtered_query = format!( + "{} | filter silo_id == \"{}\" && project_id == \"{}\"", + query.as_ref(), + authz_silo.id(), + authz_project.id() + ); + + self.timeseries_client + .oxql_query(filtered_query) + .await + .map(|result| result.tables) + .map_err(map_timeseries_err) + } +} + +fn map_timeseries_err(e: oximeter_db::Error) -> Error { + match e { + oximeter_db::Error::DatabaseUnavailable(_) + | oximeter_db::Error::Connection(_) => Error::unavail(&e.to_string()), + oximeter_db::Error::Oxql(_) + | oximeter_db::Error::TimeseriesNotFound(_) => { + Error::invalid_request(e.to_string()) + } + _ => Error::internal_error(&e.to_string()), } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a285542442..740895b7e4 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5544,6 +5544,33 @@ impl NexusExternalApi for NexusExternalApiImpl { .await } + async fn timeseries_query( + rqctx: RequestContext, + query_params: Query, + body: TypedBody, + ) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.context.nexus; + let opctx = + crate::context::op_context_for_external_api(&rqctx).await?; + let project_selector = query_params.into_inner(); + let query = body.into_inner().query; + let project_lookup = + nexus.project_lookup(&opctx, project_selector)?; + nexus + .timeseries_query_project(&opctx, &project_lookup, &query) + .await + .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables })) + .map_err(HttpError::from) + }; + apictx + .context + .external_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + // Updates async fn system_update_put_repository( diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 2e7b68eaca..466cae17a8 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -948,10 +948,14 @@ pub static DEMO_SILO_METRICS_URL: Lazy = Lazy::new(|| { ) }); -pub static TIMESERIES_LIST_URL: Lazy = +pub static TIMESERIES_QUERY_URL: Lazy = Lazy::new(|| { + format!("/v1/timeseries/query?project={}", *DEMO_PROJECT_NAME) +}); + +pub static SYSTEM_TIMESERIES_LIST_URL: Lazy = Lazy::new(|| String::from("/v1/system/timeseries/schemas")); -pub static TIMESERIES_QUERY_URL: Lazy = +pub static SYSTEM_TIMESERIES_QUERY_URL: Lazy = Lazy::new(|| String::from("/v1/system/timeseries/query")); pub static DEMO_TIMESERIES_QUERY: Lazy = @@ -2208,7 +2212,18 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { }, VerifyEndpoint { - url: &TIMESERIES_LIST_URL, + url: &TIMESERIES_QUERY_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_TIMESERIES_QUERY).unwrap() + ), + ], + }, + + VerifyEndpoint { + url: &SYSTEM_TIMESERIES_LIST_URL, visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ @@ -2217,7 +2232,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { }, VerifyEndpoint { - url: &TIMESERIES_QUERY_URL, + url: &SYSTEM_TIMESERIES_QUERY_URL, visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 33cf7e2073..7e5441c16a 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -9,16 +9,20 @@ use crate::integration_tests::instances::{ }; use chrono::Utc; use dropshot::test_util::ClientTestContext; -use dropshot::ResultsPage; +use dropshot::{HttpErrorResponseBody, ResultsPage}; use http::{Method, StatusCode}; +use nexus_auth::authn::USER_TEST_UNPRIVILEGED; +use nexus_db_queries::db::identity::Asset; +use nexus_test_utils::background::activate_background_task; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_disk, create_instance, create_project, - objects_list_page_authz, DiskTest, + grant_iam, object_create_error, objects_list_page_authz, DiskTest, }; use nexus_test_utils::wait_for_producer; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +use nexus_types::external_api::shared::ProjectRole; use nexus_types::external_api::views::OxqlQueryResult; use nexus_types::silo::DEFAULT_SILO_ID; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; @@ -266,7 +270,7 @@ async fn test_metrics( /// Test that we can correctly list some timeseries schema. #[nexus_test] -async fn test_timeseries_schema_list( +async fn test_system_timeseries_schema_list( cptestctx: &ControlPlaneTestContext, ) { // Nexus registers itself as a metric producer on startup, with its own UUID @@ -298,16 +302,44 @@ async fn test_timeseries_schema_list( } /// Run an OxQL query until it succeeds or panics. -pub async fn timeseries_query_until_success( +pub async fn system_timeseries_query( cptestctx: &ControlPlaneTestContext, query: impl ToString, +) -> Vec { + timeseries_query_until_success( + cptestctx, + "/v1/system/timeseries/query", + query, + ) + .await +} + +/// Run a project-scoped OxQL query until it succeeds or panics. +pub async fn project_timeseries_query( + cptestctx: &ControlPlaneTestContext, + project: &str, + query: impl ToString, +) -> Vec { + timeseries_query_until_success( + cptestctx, + &format!("/v1/timeseries/query?project={}", project), + query, + ) + .await +} + +/// Run an OxQL query until it succeeds or panics. +async fn timeseries_query_until_success( + cptestctx: &ControlPlaneTestContext, + endpoint: &str, + query: impl ToString, ) -> Vec { const POLL_INTERVAL: Duration = Duration::from_secs(1); const POLL_MAX: Duration = Duration::from_secs(30); let query_ = query.to_string(); wait_for_condition( || async { - match timeseries_query(cptestctx, &query_).await { + match execute_timeseries_query(cptestctx, endpoint, &query_).await { Some(r) => Ok(r), None => Err(CondCheckError::<()>::NotYet), } @@ -331,8 +363,9 @@ pub async fn timeseries_query_until_success( /// This returns `None` if the query resulted in client error and the body /// indicates that a timeseries named in the query could not be found. In all /// other cases, it either succeeds or panics. -pub async fn timeseries_query( +pub async fn execute_timeseries_query( cptestctx: &ControlPlaneTestContext, + endpoint: &str, query: impl ToString, ) -> Option> { // first, make sure the latest timeseries have been collected. @@ -351,7 +384,7 @@ pub async fn timeseries_query( nexus_test_utils::http_testing::RequestBuilder::new( &cptestctx.external_client, http::Method::POST, - "/v1/system/timeseries/query", + endpoint, ) .body(Some(&body)), ) @@ -490,7 +523,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; + let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -506,7 +539,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; + let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -523,7 +556,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; + let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -548,7 +581,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; + let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -577,7 +610,7 @@ async fn test_instance_watcher_metrics( // activate the instance watcher background task. activate_instance_watcher().await; - let metrics = timeseries_query_until_success(&cptestctx, OXQL_QUERY).await; + let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await; let checks = metrics .iter() .find(|t| t.name() == "virtual_machine:check") @@ -597,6 +630,134 @@ async fn test_instance_watcher_metrics( assert_gte!(ts2_running, 2); } +#[nexus_test] +async fn test_project_timeseries_query( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; // needed for instance create to work + + // Create two projects + let p1 = create_project(&client, "project1").await; + let _p2 = create_project(&client, "project2").await; + + // Create resources in each project + let i1 = create_instance(&client, "project1", "instance1").await; + let _i2 = create_instance(&client, "project2", "instance2").await; + + let internal_client = &cptestctx.internal_client; + + // get the instance metrics to show up + let _ = + activate_background_task(&internal_client, "instance_watcher").await; + + // Query with no project specified + let q1 = "get virtual_machine:check"; + + let result = project_timeseries_query(&cptestctx, "project1", q1).await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + // also works with project ID + let result = + project_timeseries_query(&cptestctx, &p1.identity.id.to_string(), q1) + .await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + let result = project_timeseries_query(&cptestctx, "project2", q1).await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + // with project specified + let q2 = &format!("{} | filter project_id == \"{}\"", q1, p1.identity.id); + + let result = project_timeseries_query(&cptestctx, "project1", q2).await; + assert_eq!(result.len(), 1); + assert!(result[0].timeseries().len() > 0); + + let result = project_timeseries_query(&cptestctx, "project2", q2).await; + assert_eq!(result.len(), 1); + assert_eq!(result[0].timeseries().len(), 0); + + // with instance specified + let q3 = &format!("{} | filter instance_id == \"{}\"", q1, i1.identity.id); + + // project containing instance gives me something + let result = project_timeseries_query(&cptestctx, "project1", q3).await; + assert_eq!(result.len(), 1); + assert_eq!(result[0].timeseries().len(), 1); + + // should be empty or error + let result = project_timeseries_query(&cptestctx, "project2", q3).await; + assert_eq!(result.len(), 1); + assert_eq!(result[0].timeseries().len(), 0); + + // expect error when querying a metric that has no project_id on it + let q4 = "get integration_target:integration_metric"; + let url = "/v1/timeseries/query?project=project1"; + let body = nexus_types::external_api::params::TimeseriesQuery { + query: q4.to_string(), + }; + let result = + object_create_error(client, url, &body, StatusCode::BAD_REQUEST).await; + assert_eq!(result.error_code.unwrap(), "InvalidRequest"); + // Notable that the error confirms that the metric exists and says what the + // fields are. This is helpful generally, but here it would be better if + // we could say something more like "you can't query this timeseries from + // this endpoint" + assert_eq!(result.message, "The filter expression contains identifiers that are not valid for its input timeseries. Invalid identifiers: [\"project_id\", \"silo_id\"], timeseries fields: {\"datum\", \"metric_name\", \"target_name\", \"timestamp\"}"); + + // nonexistent project + let url = "/v1/timeseries/query?project=nonexistent"; + let body = nexus_types::external_api::params::TimeseriesQuery { + query: q4.to_string(), + }; + let result = + object_create_error(client, url, &body, StatusCode::NOT_FOUND).await; + assert_eq!(result.message, "not found: project with name \"nonexistent\""); + + // unprivileged user gets 404 on project that exists, but which they can't read + let url = "/v1/timeseries/query?project=project1"; + let body = nexus_types::external_api::params::TimeseriesQuery { + query: q1.to_string(), + }; + + let request = RequestBuilder::new(client, Method::POST, url) + .body(Some(&body)) + .expect_status(Some(StatusCode::NOT_FOUND)); + let result = NexusRequest::new(request) + .authn_as(AuthnMode::UnprivilegedUser) + .execute() + .await + .unwrap() + .parsed_body::() + .unwrap(); + assert_eq!(result.message, "not found: project with name \"project1\""); + + // now grant the user access to that project only + grant_iam( + client, + "/v1/projects/project1", + ProjectRole::Viewer, + USER_TEST_UNPRIVILEGED.id(), + AuthnMode::PrivilegedUser, + ) + .await; + + // now they can access the timeseries. how cool is that + let request = RequestBuilder::new(client, Method::POST, url) + .body(Some(&body)) + .expect_status(Some(StatusCode::OK)); + let result = NexusRequest::new(request) + .authn_as(AuthnMode::UnprivilegedUser) + .execute_and_parse_unwrap::() + .await; + assert_eq!(result.tables.len(), 1); + assert_eq!(result.tables[0].timeseries().len(), 1); +} + #[nexus_test] async fn test_mgs_metrics( cptestctx: &ControlPlaneTestContext, @@ -763,7 +924,7 @@ async fn test_mgs_metrics( .try_force_collect() .await .expect("Could not force oximeter collection"); - let table = timeseries_query_until_success(&cptestctx, &query) + let table = system_timeseries_query(&cptestctx, &query) .await .into_iter() .find(|t| t.name() == name) diff --git a/openapi/nexus.json b/openapi/nexus.json index 79186e379a..c0b6a96fcf 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -8890,6 +8890,55 @@ } } }, + "/v1/timeseries/query": { + "post": { + "tags": [ + "hidden" + ], + "summary": "Run project-scoped timeseries query", + "description": "Queries are written in OxQL. Project must be specified by name or ID in URL query parameter. The OxQL query will only return timeseries data from the specified project.", + "operationId": "timeseries_query", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TimeseriesQuery" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OxqlQueryResult" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/users": { "get": { "tags": [