Skip to content

Commit

Permalink
Move oxql from oxdb to mainline omdb (#5988)
Browse files Browse the repository at this point in the history
## Why?:

Concerning [network observability
work](https://github.com/orgs/oxidecomputer/projects/55/views/1?filterQuery=&pane=issue&itemId=68336554),
this makes the [`oxql`](https://rfd.shared.oxide.computer/rfd/0463)
interactive query repl accessible via omdb, as we start to give users
and ourselves the ability to query timeseries and metrics more easily.
Additionally, in the "now", this aids in debugging through our metrics
set and makes it available, via omdb, throughout our ecosystem/a4x2.

## Includes:
* Moves `oxql_shell` into the oximeter_db lib for use by both omdb and
oxdb.
  * If no URL is given to `omdb oxql`, it will leverage internal DNS.
* Update the oximeter omdb call (for listing producers) to leverage
internal.
    DNS if no URL is given.
* Update command/output tests/generations and collector-specific tests
for list producers.

## Notes:
  * The oxql client still expects an socket address as liked it typed
specifically v.s. a String. Instead, upon running the `omdb oxql`
command,
we take in a URL String and parse it into the socket address directly.

---------

Co-authored-by: Benjamin Naecker <[email protected]>
  • Loading branch information
zeeshanlakhani and bnaecker authored Jul 9, 2024
1 parent d7a5c1c commit dcfac83
Show file tree
Hide file tree
Showing 23 changed files with 879 additions and 469 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ tags
.falcon/*
.img/*
connectivity-report.json
*.local
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ oso = "0.27"
owo-colors = "4.0.0"
oximeter = { path = "oximeter/oximeter" }
oximeter-client = { path = "clients/oximeter-client" }
oximeter-db = { path = "oximeter/db/" }
oximeter-db = { path = "oximeter/db/", default-features = false }
oximeter-collector = { path = "oximeter/collector" }
oximeter-impl = { path = "oximeter/impl" }
oximeter-instruments = { path = "oximeter/instruments" }
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ nexus-types.workspace = true
omicron-common.workspace = true
omicron-uuid-kinds.workspace = true
oximeter-client.workspace = true
oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] }
# See omicron-rpaths for more about the "pq-sys" dependency.
pq-sys = "*"
ratatui.workspace = true
Expand All @@ -51,6 +52,7 @@ tabled.workspace = true
textwrap.workspace = true
tokio = { workspace = true, features = [ "full" ] }
unicode-width.workspace = true
url.workspace = true
uuid.workspace = true
ipnetwork.workspace = true
omicron-workspace-hack.workspace = true
Expand Down
6 changes: 5 additions & 1 deletion dev-tools/omdb/src/bin/omdb/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ mod helpers;
mod mgs;
mod nexus;
mod oximeter;
mod oxql;
mod sled_agent;

#[tokio::main]
Expand All @@ -66,7 +67,8 @@ async fn main() -> Result<(), anyhow::Error> {
OmdbCommands::Db(db) => db.run_cmd(&args, &log).await,
OmdbCommands::Mgs(mgs) => mgs.run_cmd(&args, &log).await,
OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await,
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&log).await,
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&args, &log).await,
OmdbCommands::Oxql(oxql) => oxql.run_cmd(&args, &log).await,
OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await,
OmdbCommands::CrucibleAgent(crucible) => crucible.run_cmd(&args).await,
}
Expand Down Expand Up @@ -269,6 +271,8 @@ enum OmdbCommands {
Nexus(nexus::NexusArgs),
/// Query oximeter collector state
Oximeter(oximeter::OximeterArgs),
/// Enter the Oximeter Query Language shell for interactive querying.
Oxql(oxql::OxqlArgs),
/// Debug a specific Sled
SledAgent(sled_agent::SledAgentArgs),
}
Expand Down
49 changes: 38 additions & 11 deletions dev-tools/omdb/src/bin/omdb/oximeter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! omdb commands that query oximeter
use crate::helpers::CONNECTION_OPTIONS_HEADING;
use crate::Omdb;
use anyhow::Context;
use clap::Args;
use clap::Subcommand;
Expand All @@ -18,18 +19,17 @@ use tabled::Table;
use tabled::Tabled;
use uuid::Uuid;

/// Arguments for the oximeter subcommand.
#[derive(Debug, Args)]
pub struct OximeterArgs {
/// URL of the oximeter collector to query
#[arg(
long,
env = "OMDB_OXIMETER_URL",
// This can't be global = true (i.e. passed in later in the
// command-line) because global options can't be required. If this
// changes to being optional, we should set global = true.
global = true,
help_heading = CONNECTION_OPTIONS_HEADING,
)]
oximeter_url: String,
oximeter_url: Option<String>,

#[command(subcommand)]
command: OximeterCommands,
Expand All @@ -38,20 +38,47 @@ pub struct OximeterArgs {
/// Subcommands that query oximeter collector state
#[derive(Debug, Subcommand)]
enum OximeterCommands {
/// List the producers the collector is assigned to poll
/// List the producers the collector is assigned to poll.
ListProducers,
}

impl OximeterArgs {
fn client(&self, log: &Logger) -> Client {
Client::new(
&self.oximeter_url,
async fn client(
&self,
omdb: &Omdb,
log: &Logger,
) -> Result<Client, anyhow::Error> {
let oximeter_url = match &self.oximeter_url {
Some(cli_or_env_url) => cli_or_env_url.clone(),
None => {
eprintln!(
"note: Oximeter URL not specified. Will pick one from DNS."
);
let addr = omdb
.dns_lookup_one(
log.clone(),
internal_dns::ServiceName::Oximeter,
)
.await?;
format!("http://{}", addr)
}
};
eprintln!("note: using Oximeter URL {}", &oximeter_url);

let client = Client::new(
&oximeter_url,
log.new(slog::o!("component" => "oximeter-client")),
)
);
Ok(client)
}

pub async fn run_cmd(&self, log: &Logger) -> anyhow::Result<()> {
let client = self.client(log);
/// Run the command.
pub async fn run_cmd(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<()> {
let client = self.client(omdb, log).await?;
match self.command {
OximeterCommands::ListProducers => {
self.list_producers(client).await
Expand Down
97 changes: 97 additions & 0 deletions dev-tools/omdb/src/bin/omdb/oxql.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! omdb OxQL shell for interactive queries on metrics/timeseries.
// Copyright 2024 Oxide Computer

use crate::helpers::CONNECTION_OPTIONS_HEADING;
use crate::Omdb;
use anyhow::Context;
use clap::Args;
use oximeter_db::{
self,
shells::oxql::{self, ShellOptions},
};
use slog::Logger;
use std::net::SocketAddr;
use url::Url;

/// Command-line arguments for the OxQL shell.
#[derive(Debug, Args)]
pub struct OxqlArgs {
/// URL of the ClickHouse server to connect to.
#[arg(
long,
env = "OMDB_CLICKHOUSE_URL",
global = true,
help_heading = CONNECTION_OPTIONS_HEADING,
)]
clickhouse_url: Option<String>,

/// Print summaries of each SQL query run against the database.
#[clap(long = "summaries")]
print_summaries: bool,

/// Print the total elapsed query duration.
#[clap(long = "elapsed")]
print_elapsed: bool,
}

impl OxqlArgs {
/// Run the OxQL shell via the `omdb oxql` subcommand.
pub async fn run_cmd(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<()> {
let addr = self.addr(omdb, log).await?;

let opts = ShellOptions {
print_summaries: self.print_summaries,
print_elapsed: self.print_elapsed,
};

oxql::shell(
addr.ip(),
addr.port(),
log.new(slog::o!("component" => "clickhouse-client")),
opts,
)
.await
}

/// Resolve the ClickHouse URL to a socket address.
async fn addr(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<SocketAddr> {
match &self.clickhouse_url {
Some(cli_or_env_url) => Url::parse(&cli_or_env_url)
.context(
"failed parsing URL from command-line or environment variable",
)?
.socket_addrs(|| None)
.context("failed resolving socket addresses")?
.into_iter()
.next()
.context("failed resolving socket addresses"),
None => {
eprintln!(
"note: ClickHouse URL not specified. Will pick one from DNS."
);

Ok(SocketAddr::V6(
omdb.dns_lookup_one(
log.clone(),
internal_dns::ServiceName::Clickhouse,
)
.await
.context("failed looking up ClickHouse internal DNS entry")?,
))
}
}
}
}
25 changes: 25 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -433,3 +433,28 @@ note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=d
note: database schema version matches expected (<redacted database version>)
note: listing all commissioned sleds (use -F to filter, e.g. -F in-service)
=============================================
EXECUTING COMMAND: omdb ["oximeter", "--oximeter-url", "junk", "list-producers"]
termination: Exited(1)
---------------------------------------------
stdout:
---------------------------------------------
stderr:
note: using Oximeter URL junk
Error: failed to fetch collector info

Caused by:
0: Communication Error: builder error: relative URL without a base
1: builder error: relative URL without a base
2: relative URL without a base
=============================================
EXECUTING COMMAND: omdb ["oxql", "--clickhouse-url", "junk"]
termination: Exited(1)
---------------------------------------------
stdout:
---------------------------------------------
stderr:
Error: failed parsing URL from command-line or environment variable

Caused by:
relative URL without a base
=============================================
24 changes: 12 additions & 12 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -405,14 +405,14 @@ task: "dns_propagation_external"


task: "nat_v4_garbage_collector"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "blueprint_loader"
configured period: every 1m 40s
configured period: every 1m <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand All @@ -436,7 +436,7 @@ task: "abandoned_vmm_reaper"
sled resource reservations deleted: 0

task: "bfd_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand Down Expand Up @@ -467,7 +467,7 @@ task: "external_endpoints"
TLS certificates: 0

task: "instance_watcher"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand Down Expand Up @@ -503,30 +503,30 @@ task: "metrics_producer_gc"
warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String("<REDACTED TIMESTAMP>"), "pruned": Array []})

task: "phantom_disks"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
number of phantom disks deleted: 0
number of phantom disk delete errors: 0

task: "physical_disk_adoption"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: task disabled

task: "region_replacement"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
number of region replacements started ok: 0
number of region replacement start errors: 0

task: "region_replacement_driver"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand All @@ -541,28 +541,28 @@ task: "service_firewall_rule_propagation"
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms

task: "service_zone_nat_tracker"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: inventory collection is None

task: "switch_port_config_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "v2p_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {})

task: "vpc_route_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand Down
Loading

0 comments on commit dcfac83

Please sign in to comment.