Skip to content

Commit

Permalink
[gateway] Separate enum for SP lookup errors (#6413)
Browse files Browse the repository at this point in the history
Currently, MGS has a `SpCommsError` enum that represents pretty much
every error that may occur, including failures to look up a SP's address
and communication failures while talking to that SP. This means that
almost all of the `ManagementSwitch` and `SingleSp` functions return all
of the possible errors of any of those operations. Perhaps more
importantly, this means that the *authors* of code that calls these
functions cannot easily determine which errors will actually be returned
by the functions they're calling.

For example, while working on #6354, I had incorrectly believed that the
`ManagementSwitch::sp` function, which returns a `SingleSp` for a
`SpIdentifier`, could fail if there were transient network issues, and
that my code would need to handle this. In fact, it turns out that this
function only returns an error if discovery hasn't completed yet, or if
the `SpIdentifier` is invalid, both of which are fatal errors (and
should all never be returned at the point where my code calls that
function).

Therefore, this branch refactors the error types a bit to separate a
`SpLookupError` from `SpCommsError`, and change the `ManagementSwitch`
functions that only return errors if discovery hasn't completed
successfully or an `SpIdentifier` is invalid to return a `SpLookupError`
instead. The `SpCommsError` has a `Discovery` variant which is produced
from a `SpLookupError`, so this is actually a fairly minimal change
overall --- functions returning a `SpCommsError` via the `?` operator
can continue doing so identically to how they do today.

I'd like to merge this before #6354, so that I can use it to clean up
some error handling code in that branch.
  • Loading branch information
hawkw authored Aug 22, 2024
1 parent f1e1aff commit 7c8c2c3
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 29 deletions.
56 changes: 36 additions & 20 deletions gateway/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,8 @@ pub enum StartupError {

#[derive(Debug, Error, SlogInlineError)]
pub enum SpCommsError {
#[error("discovery process not yet complete")]
DiscoveryNotYetComplete,
#[error("location discovery failed: {reason}")]
DiscoveryFailed { reason: String },
#[error("nonexistent SP {0:?}")]
SpDoesNotExist(SpIdentifier),
#[error(transparent)]
Discovery(#[from] SpLookupError),
#[error("unknown socket address for SP {0:?}")]
SpAddressUnknown(SpIdentifier),
#[error(
Expand All @@ -52,13 +48,22 @@ pub enum SpCommsError {
},
}

/// Errors returned by attempts to look up a SP in the management switch's
/// discovery map.
#[derive(Debug, Error, SlogInlineError)]
pub enum SpLookupError {
#[error("discovery process not yet complete")]
DiscoveryNotYetComplete,
#[error("location discovery failed: {reason}")]
DiscoveryFailed { reason: String },
#[error("nonexistent SP {0:?}")]
SpDoesNotExist(SpIdentifier),
}

impl From<SpCommsError> for HttpError {
fn from(error: SpCommsError) -> Self {
match error {
SpCommsError::SpDoesNotExist(_) => HttpError::for_bad_request(
Some("InvalidSp".to_string()),
InlineErrorChain::new(&error).to_string(),
),
SpCommsError::Discovery(err) => HttpError::from(err),
SpCommsError::SpCommunicationFailed {
err:
CommunicationError::SpError(
Expand Down Expand Up @@ -124,21 +129,11 @@ impl From<SpCommsError> for HttpError {
"UpdateInProgress",
InlineErrorChain::new(&error).to_string(),
),
SpCommsError::DiscoveryNotYetComplete => http_err_with_message(
http::StatusCode::SERVICE_UNAVAILABLE,
"DiscoveryNotYetComplete",
InlineErrorChain::new(&error).to_string(),
),
SpCommsError::SpAddressUnknown(_) => http_err_with_message(
http::StatusCode::SERVICE_UNAVAILABLE,
"SpAddressUnknown",
InlineErrorChain::new(&error).to_string(),
),
SpCommsError::DiscoveryFailed { .. } => http_err_with_message(
http::StatusCode::SERVICE_UNAVAILABLE,
"DiscoveryFailed ",
InlineErrorChain::new(&error).to_string(),
),
SpCommsError::Timeout { .. } => http_err_with_message(
http::StatusCode::SERVICE_UNAVAILABLE,
"Timeout ",
Expand All @@ -160,6 +155,27 @@ impl From<SpCommsError> for HttpError {
}
}

impl From<SpLookupError> for HttpError {
fn from(error: SpLookupError) -> Self {
match error {
SpLookupError::SpDoesNotExist(_) => HttpError::for_bad_request(
Some("InvalidSp".to_string()),
InlineErrorChain::new(&error).to_string(),
),
SpLookupError::DiscoveryNotYetComplete => http_err_with_message(
http::StatusCode::SERVICE_UNAVAILABLE,
"DiscoveryNotYetComplete",
InlineErrorChain::new(&error).to_string(),
),
SpLookupError::DiscoveryFailed { .. } => http_err_with_message(
http::StatusCode::SERVICE_UNAVAILABLE,
"DiscoveryFailed ",
InlineErrorChain::new(&error).to_string(),
),
}
}
}

// Helper function to return an `HttpError` with the same internal and external
// message. MGS is an "internal" service - even when we return a 500-level
// status code, we want to give our caller some information about what is going
Expand Down
19 changes: 10 additions & 9 deletions gateway/src/management_switch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub use self::location_map::SwitchPortConfig;
pub use self::location_map::SwitchPortDescription;
use self::location_map::ValidatedLocationConfig;
use crate::error::SpCommsError;
use crate::error::SpLookupError;
use crate::error::StartupError;
use gateway_messages::IgnitionState;
use gateway_sp_comms::default_discovery_addr;
Expand Down Expand Up @@ -316,18 +317,18 @@ impl ManagementSwitch {
self.location_map.get().is_some()
}

fn location_map(&self) -> Result<&LocationMap, SpCommsError> {
fn location_map(&self) -> Result<&LocationMap, SpLookupError> {
let discovery_result = self
.location_map
.get()
.ok_or(SpCommsError::DiscoveryNotYetComplete)?;
.ok_or(SpLookupError::DiscoveryNotYetComplete)?;
discovery_result
.as_ref()
.map_err(|s| SpCommsError::DiscoveryFailed { reason: s.clone() })
.map_err(|s| SpLookupError::DiscoveryFailed { reason: s.clone() })
}

/// Get the identifier of our local switch.
pub fn local_switch(&self) -> Result<SpIdentifier, SpCommsError> {
pub fn local_switch(&self) -> Result<SpIdentifier, SpLookupError> {
let location_map = self.location_map()?;
Ok(location_map.port_to_id(self.local_ignition_controller_port))
}
Expand All @@ -347,11 +348,11 @@ impl ManagementSwitch {
/// This method will fail if discovery is not yet complete (i.e., we don't
/// know the logical identifiers of any SP yet!) or if `id` specifies an SP
/// that doesn't exist in our discovered location map.
fn get_port(&self, id: SpIdentifier) -> Result<SwitchPort, SpCommsError> {
fn get_port(&self, id: SpIdentifier) -> Result<SwitchPort, SpLookupError> {
let location_map = self.location_map()?;
let port = location_map
.id_to_port(id)
.ok_or(SpCommsError::SpDoesNotExist(id))?;
.ok_or(SpLookupError::SpDoesNotExist(id))?;
Ok(port)
}

Expand All @@ -362,7 +363,7 @@ impl ManagementSwitch {
/// This method will fail if discovery is not yet complete (i.e., we don't
/// know the logical identifiers of any SP yet!) or if `id` specifies an SP
/// that doesn't exist in our discovered location map.
pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpCommsError> {
pub fn sp(&self, id: SpIdentifier) -> Result<&SingleSp, SpLookupError> {
let port = self.get_port(id)?;
Ok(self.port_to_sp(port))
}
Expand All @@ -377,7 +378,7 @@ impl ManagementSwitch {
pub fn ignition_target(
&self,
id: SpIdentifier,
) -> Result<u8, SpCommsError> {
) -> Result<u8, SpLookupError> {
let port = self.get_port(id)?;
Ok(self.port_to_ignition_target[port.0])
}
Expand All @@ -389,7 +390,7 @@ impl ManagementSwitch {
/// therefore can't map our switch ports to SP identities).
pub(crate) fn all_sps(
&self,
) -> Result<impl Iterator<Item = (SpIdentifier, &SingleSp)>, SpCommsError>
) -> Result<impl Iterator<Item = (SpIdentifier, &SingleSp)>, SpLookupError>
{
let location_map = self.location_map()?;
Ok(location_map
Expand Down

0 comments on commit 7c8c2c3

Please sign in to comment.