Skip to content

Commit

Permalink
background task
Browse files Browse the repository at this point in the history
  • Loading branch information
iliana committed Nov 18, 2024
1 parent 8a6182a commit ba36c79
Show file tree
Hide file tree
Showing 20 changed files with 652 additions and 17 deletions.
28 changes: 28 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus
use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus;
use nexus_types::internal_api::background::TufArtifactReplicationStatus;
use nexus_types::inventory::BaseboardId;
use omicron_uuid_kinds::CollectionUuid;
use omicron_uuid_kinds::DemoSagaUuid;
Expand Down Expand Up @@ -1928,6 +1929,33 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
}
}
};
} else if name == "tuf_artifact_replication" {
match serde_json::from_value::<TufArtifactReplicationStatus>(
details.clone(),
) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),
Ok(status) => {
const ROWS: &[&str] = &[
"requests ok:",
"requests errored:",
"requests outstanding:",
"local repos:",
];
const WIDTH: usize = const_max_len(ROWS);
println!(" last execution:");
for (label, value) in ROWS.iter().zip([
status.requests_ok,
status.requests_err,
status.requests_outstanding,
status.local_repos,
]) {
println!(" {label:<WIDTH$} {value:>3}");
}
}
}
} else {
println!(
"warning: unknown background task: {:?} \
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -337,6 +341,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -495,6 +503,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down
26 changes: 26 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -698,6 +702,17 @@ task: "switch_port_config_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "tuf_artifact_replication"
configured period: every <REDACTED_DURATION>m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last execution:
requests ok: 0
requests errored: 0
requests outstanding: 0
local repos: 0

task: "v2p_manager"
configured period: every <REDACTED_DURATION>s
currently executing: no
Expand Down Expand Up @@ -1141,6 +1156,17 @@ task: "switch_port_config_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "tuf_artifact_replication"
configured period: every <REDACTED_DURATION>m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last execution:
requests ok: 0
requests errored: 0
requests outstanding: 0
local repos: 0

task: "v2p_manager"
configured period: every <REDACTED_DURATION>s
currently executing: no
Expand Down
16 changes: 16 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,8 @@ pub struct BackgroundTaskConfig {
/// configuration for region snapshot replacement finisher task
pub region_snapshot_replacement_finish:
RegionSnapshotReplacementFinishConfig,
/// configuration for TUF artifact replication task
pub tuf_artifact_replication: TufArtifactReplicationConfig,
}

#[serde_as]
Expand Down Expand Up @@ -703,6 +705,14 @@ pub struct RegionSnapshotReplacementFinishConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct TufArtifactReplicationConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -958,6 +968,7 @@ mod test {
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 60
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1156,6 +1167,10 @@ mod test {
RegionSnapshotReplacementFinishConfig {
period_secs: Duration::from_secs(30),
},
tuf_artifact_replication:
TufArtifactReplicationConfig {
period_secs: Duration::from_secs(60)
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -1237,6 +1252,7 @@ mod test {
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 60
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(114, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(115, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(115, "tuf-artifact-replication"),
KnownVersion::new(114, "crucible-ref-count-records"),
KnownVersion::new(113, "add-tx-eq"),
KnownVersion::new(112, "blueprint-dataset"),
Expand Down
22 changes: 20 additions & 2 deletions nexus/db-queries/src/db/datastore/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ use crate::context::OpContext;
use crate::db;
use crate::db::error::{public_error_from_diesel, ErrorHandler};
use crate::db::model::SemverVersion;
use crate::db::pagination::paginated;
use crate::transaction_retry::OptionalError;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::prelude::*;
use diesel::result::Error as DieselError;
use nexus_db_model::{ArtifactHash, TufArtifact, TufRepo, TufRepoDescription};
use omicron_common::api::external::{
self, CreateResult, LookupResult, LookupType, ResourceType,
TufRepoInsertStatus,
self, CreateResult, DataPageParams, ListResultVec, LookupResult,
LookupType, ResourceType, TufRepoInsertStatus,
};
use omicron_uuid_kinds::TufRepoKind;
use omicron_uuid_kinds::TypedUuid;
Expand Down Expand Up @@ -147,6 +148,23 @@ impl DataStore {
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
Ok(TufRepoDescription { repo, artifacts })
}

/// Returns the list of all TUF repo artifacts known to the system.
pub async fn update_tuf_artifact_list(
&self,
opctx: &OpContext,
pagparams: &DataPageParams<'_, ArtifactHash>,
) -> ListResultVec<TufArtifact> {
opctx.authorize(authz::Action::Read, &authz::FLEET).await?;

use db::schema::tuf_artifact::dsl;

paginated(dsl::tuf_artifact, dsl::sha256, pagparams)
.select(TufArtifact::as_select())
.load_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}
}

// This is a separate method mostly to make rustfmt not bail out on long lines
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 60

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ region_snapshot_replacement_start.period_secs = 30
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 60

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
25 changes: 24 additions & 1 deletion nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ use super::tasks::saga_recovery;
use super::tasks::service_firewall_rules;
use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker;
use super::tasks::sync_switch_configuration::SwitchPortSettingsManager;
use super::tasks::tuf_artifact_replication;
use super::tasks::v2p_mappings::V2PManager;
use super::tasks::vpc_routes;
use super::Activator;
Expand All @@ -133,7 +134,9 @@ use omicron_uuid_kinds::OmicronZoneUuid;
use oximeter::types::ProducerRegistry;
use std::collections::BTreeMap;
use std::sync::Arc;
use tokio::sync::mpsc;
use tokio::sync::watch;
use update_common::artifacts::ArtifactsWithPlan;
use uuid::Uuid;

/// Interface for activating various background tasks and read data that they
Expand Down Expand Up @@ -172,6 +175,7 @@ pub struct BackgroundTasks {
pub task_region_snapshot_replacement_garbage_collection: Activator,
pub task_region_snapshot_replacement_step: Activator,
pub task_region_snapshot_replacement_finish: Activator,
pub task_tuf_artifact_replication: Activator,

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down Expand Up @@ -259,6 +263,7 @@ impl BackgroundTasksInitializer {
),
task_region_snapshot_replacement_step: Activator::new(),
task_region_snapshot_replacement_finish: Activator::new(),
task_tuf_artifact_replication: Activator::new(),

task_internal_dns_propagation: Activator::new(),
task_external_dns_propagation: Activator::new(),
Expand Down Expand Up @@ -325,6 +330,7 @@ impl BackgroundTasksInitializer {
task_region_snapshot_replacement_garbage_collection,
task_region_snapshot_replacement_step,
task_region_snapshot_replacement_finish,
task_tuf_artifact_replication,
// Add new background tasks here. Be sure to use this binding in a
// call to `Driver::register()` below. That's what actually wires
// up the Activator to the corresponding background task.
Expand Down Expand Up @@ -825,13 +831,28 @@ impl BackgroundTasksInitializer {
done",
period: config.region_snapshot_replacement_finish.period_secs,
task_impl: Box::new(RegionSnapshotReplacementFinishDetector::new(
datastore,
datastore.clone(),
)),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_region_snapshot_replacement_finish,
});

driver.register(TaskDefinition {
name: "tuf_artifact_replication",
description: "replicate update repo artifacts across sleds",
period: config.tuf_artifact_replication.period_secs,
task_impl: Box::new(
tuf_artifact_replication::ArtifactReplication::new(
datastore.clone(),
args.tuf_artifact_replication_rx,
),
),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_tuf_artifact_replication,
});

driver
}
}
Expand All @@ -856,6 +877,8 @@ pub struct BackgroundTasksData {
pub producer_registry: ProducerRegistry,
/// Helpers for saga recovery
pub saga_recovery: saga_recovery::SagaRecoveryHelpers<Arc<Nexus>>,
/// Channel for TUF repository artifacts to be replicated out to sleds
pub tuf_artifact_replication_rx: mpsc::Receiver<ArtifactsWithPlan>,
}

/// Starts the three DNS-propagation-related background tasks for either
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/background/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ pub mod saga_recovery;
pub mod service_firewall_rules;
pub mod sync_service_zone_nat;
pub mod sync_switch_configuration;
pub mod tuf_artifact_replication;
pub mod v2p_mappings;
pub mod vpc_routes;
Loading

0 comments on commit ba36c79

Please sign in to comment.