Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fail deployment when it's aborted #115

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/src/mender-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,10 @@ mender_api_publish_deployment_status(const char *id, mender_deployment_status_t
if (204 == status) {
/* No response expected */
ret = MENDER_OK;
} else if (409 == status) {
/* Deployment aborted */
mender_api_print_response_error(response, status);
ret = MENDER_ABORTED;
} else {
mender_api_print_response_error(response, status);
ret = MENDER_FAIL;
Expand Down
33 changes: 24 additions & 9 deletions core/src/mender-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,15 @@ set_and_store_state(const mender_update_state_t state) {
return ret;
}

/**
* @brief Publish deployment status and check if deployment is aborted
*/
#define PUBLISH_DEPLOYMENT_STATUS_CHECK_ABORTED(ret, aborted_deployment, id, deployment_status) \
aborted_deployment = (MENDER_ABORTED == mender_client_publish_deployment_status(id, deployment_status)); \
if (aborted_deployment) { \
ret = MENDER_FAIL; \
}

static mender_err_t
mender_client_update_work_function(void) {
mender_err_t ret = MENDER_OK;
Expand All @@ -865,9 +874,10 @@ mender_client_update_work_function(void) {
mender_artifact_ctx_t *mender_artifact_ctx = NULL;

/* Check for deployment */
mender_api_deployment_data_t *deployment = NULL;
mender_update_state_t update_state = MENDER_UPDATE_STATE_DOWNLOAD;
const char *deployment_id = NULL;
mender_api_deployment_data_t *deployment = NULL;
mender_update_state_t update_state = MENDER_UPDATE_STATE_DOWNLOAD;
const char *deployment_id = NULL;
bool aborted_deployment = false;

/* reset the currently used update module */
mender_update_module = NULL;
Expand Down Expand Up @@ -943,12 +953,13 @@ mender_client_update_work_function(void) {
mender_log_info("Downloading artifact with id '%s', name '%s', uri '%s'", deployment->id, deployment->artifact_name, deployment->uri);
}
#endif
mender_client_publish_deployment_status(deployment->id, MENDER_DEPLOYMENT_STATUS_DOWNLOADING);

/* Set deployment_id */
deployment_id = deployment->id;

if (MENDER_OK == (ret = mender_download_artifact(deployment->uri, mender_client_deployment_data, &mender_update_module))) {
PUBLISH_DEPLOYMENT_STATUS_CHECK_ABORTED(ret, aborted_deployment, deployment->id, MENDER_DEPLOYMENT_STATUS_DOWNLOADING);

if ((!aborted_deployment)
&& (MENDER_OK == (ret = mender_download_artifact(deployment->uri, mender_client_deployment_data, &mender_update_module)))) {
assert(NULL != mender_update_module);

/* Get artifact context if artifact download succeeded */
Expand Down Expand Up @@ -990,8 +1001,8 @@ mender_client_update_work_function(void) {

case MENDER_UPDATE_STATE_INSTALL:
mender_log_info("Download done, installing artifact");
mender_client_publish_deployment_status(deployment_id, MENDER_DEPLOYMENT_STATUS_INSTALLING);
if (NULL != mender_update_module->callbacks[update_state]) {
PUBLISH_DEPLOYMENT_STATUS_CHECK_ABORTED(ret, aborted_deployment, deployment_id, MENDER_DEPLOYMENT_STATUS_INSTALLING);
if ((MENDER_OK == ret) && NULL != mender_update_module->callbacks[update_state]) {
ret = mender_update_module->callbacks[update_state](update_state, (mender_update_state_data_t)NULL);
}
if ((MENDER_OK == ret) && !mender_update_module->requires_reboot) {
Expand All @@ -1007,7 +1018,7 @@ mender_client_update_work_function(void) {
case MENDER_UPDATE_STATE_REBOOT:
assert(mender_update_module->requires_reboot);
mender_log_info("Artifact installation done, rebooting");
mender_client_publish_deployment_status(deployment_id, MENDER_DEPLOYMENT_STATUS_REBOOTING);
PUBLISH_DEPLOYMENT_STATUS_CHECK_ABORTED(ret, aborted_deployment, deployment_id, MENDER_DEPLOYMENT_STATUS_REBOOTING);
if ((MENDER_OK == ret) && (NULL != mender_update_module->callbacks[update_state])) {
/* Save the next state before running the reboot callback --
* if there is an interrupt (power, crash,...) right after,
Expand Down Expand Up @@ -1052,6 +1063,7 @@ mender_client_update_work_function(void) {
ret = mender_update_module->callbacks[update_state](update_state, (mender_update_state_data_t)NULL);
}
if (MENDER_OK == ret) {
/* We don't check for MENDER_ABORTED, as it's too late if it has reached this far */
mender_client_publish_deployment_status(deployment_id, MENDER_DEPLOYMENT_STATUS_SUCCESS);
}
NEXT_STATE;
Expand All @@ -1069,6 +1081,9 @@ mender_client_update_work_function(void) {
if (!mender_update_module->supports_rollback) {
mender_log_warning("Rollback not supported for artifacts of type '%s'", mender_update_module->artifact_type);
ret = MENDER_FAIL;
} else if (aborted_deployment) {
/* Don't rollback if deployment is aborted */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't you as a user expect a rollback if you abort a deployment that isn't committed yet?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right - but the code doesn't support an abort after a reboot - there is nothing publishing the status between reboot and commit, and on commit we've said it's too late. My idea is that before we have rebooted, we should not rollback into the other partition, but just go to failure - if not we'll end up in the wrong partition if we e.g. abort the deployment after the install state has published its status and set the pending image

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand, sorry. The code is in a place where we are in the ROLLBACK state, i.e. we are supposed to rollback. And at that place we don't rollback if the deployment was aborted. To me that's wrong. We either should not get into the ROLLBACK in case there's nothing to rollback or we should do a rollback. So, if I understand correctly what you describe, the proper behavior is to not end up in this state in such a case. IOW, either there's something to rollback and we should do a rollback if we get here and the deployment is aborted or there's nothing to rollback and we should not end up here. Or am I still missing something? If so, please describe the sequence of states that will result in a problematic case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assume we have MENDER_UPDATE_STATE_DOWNLOAD -> MENDER_UPDATE_STATE_INSTALL -> MENDER_UPDATE_STATE_REBOOT.
On each of these states we publish the status and check if the deployment is aborted. If it's aborted when we enter MENDER_UPDATE_STATE_REBOOT, then what do we want to do? We haven't rebooted yet as this is caught before the reboot callback is called, so we can either go directly to failure - which deviates from the normal flow, but it does make sense since we haven't actually rebooted, and this is what I originally did, or if we follow the normal state transitions we should go the failure route, which will transition to rollback, but indeed, we have nothing to rollback, which is why I checked for an aborted deployment and set ret = MENDER_FAIL in MENDER_UPDATE_STATE_ROLLBACK

If we checked if the status is aborted in MENDER_UPDATE_STATE_COMMIT (which Lluis said was too late, but I don't quite know why it's too late), then it would make sense to do a proper rollback, as we have rebooted into the new partition, and we would then need to perform a rollback

ret = MENDER_FAIL;
} else if (NULL != mender_update_module->callbacks[update_state]) {
ret = mender_update_module->callbacks[update_state](update_state, (mender_update_state_data_t)NULL);
}
Expand Down
4 changes: 2 additions & 2 deletions core/src/mender-zephyr-image-update-module.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ mender_zephyr_image_set_pending_image(MENDER_NDEBUG_UNUSED mender_update_state_t
return MENDER_FAIL;
}

if (MENDER_OK != (ret = mender_flash_set_pending_image(mcu_boot_flash_handle))) {
if (MENDER_OK != (ret = mender_flash_set_pending_image(&mcu_boot_flash_handle))) {
mender_log_error("Unable to set boot partition");
return ret;
}
Expand All @@ -153,7 +153,7 @@ mender_zephyr_image_abort_deployment(MENDER_NDEBUG_UNUSED mender_update_state_t
assert(MENDER_UPDATE_STATE_FAILURE == state);
mender_err_t ret;

if (MENDER_OK != (ret = mender_flash_abort_deployment(mcu_boot_flash_handle))) {
if (MENDER_OK != (ret = mender_flash_abort_deployment(&mcu_boot_flash_handle))) {
mender_log_error("Unable to abort deployment");
return ret;
}
Expand Down
4 changes: 2 additions & 2 deletions include/mender-flash.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ mender_err_t mender_flash_close(void *handle);
* @param handle Handle from mender_flash_open
* @return MENDER_OK if the function succeeds, error code otherwise
*/
mender_err_t mender_flash_set_pending_image(void *handle);
mender_err_t mender_flash_set_pending_image(void **handle);

/**
* @brief Abort current deployment
* @param handle Handle from mender_flash_open
* @return MENDER_OK if the function succeeds, error code otherwise
*/
mender_err_t mender_flash_abort_deployment(void *handle);
mender_err_t mender_flash_abort_deployment(void **handle);

/**
* @brief Mark image valid and cancel rollback if this is still pending
Expand Down
1 change: 1 addition & 0 deletions include/mender-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ typedef enum {
MENDER_NOT_FOUND = -2, /**< Not found */
MENDER_NOT_IMPLEMENTED = -3, /**< Not implemented */
MENDER_LOOP_DETECTED = -4, /**< Loop detected */
MENDER_ABORTED = -5, /**< Aborted */
} mender_err_t;

/**
Expand Down
4 changes: 2 additions & 2 deletions platform/flash/generic/weak/src/mender-flash.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ mender_flash_close(void *handle) {
}

__attribute__((weak)) mender_err_t
mender_flash_set_pending_image(void *handle) {
mender_flash_set_pending_image(void **handle) {

(void)handle;

Expand All @@ -62,7 +62,7 @@ mender_flash_set_pending_image(void *handle) {
}

__attribute__((weak)) mender_err_t
mender_flash_abort_deployment(void *handle) {
mender_flash_abort_deployment(void **handle) {

(void)handle;

Expand Down
10 changes: 5 additions & 5 deletions platform/flash/posix/src/mender-flash.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,13 @@ mender_flash_close(void *handle) {
}

mender_err_t
mender_flash_set_pending_image(void *handle) {
mender_flash_set_pending_image(void **handle) {

FILE *file;
mender_err_t ret = MENDER_OK;

/* Check flash handle */
if (NULL != handle) {
if (NULL != *handle) {

/* Write request update file */
if (NULL == (file = fopen(MENDER_FLASH_REQUEST_UPGRADE, "wb"))) {
Expand All @@ -122,13 +122,13 @@ mender_flash_set_pending_image(void *handle) {
}

mender_err_t
mender_flash_abort_deployment(void *handle) {
mender_flash_abort_deployment(void **handle) {

/* Check flash handle */
if (NULL != handle) {
if (NULL != *handle) {

/* Release memory */
fclose(handle);
fclose(*handle);
}

return MENDER_OK;
Expand Down
10 changes: 5 additions & 5 deletions platform/flash/zephyr/src/mender-flash.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ mender_flash_close(void *handle) {
}

mender_err_t
mender_flash_set_pending_image(void *handle) {
mender_flash_set_pending_image(void **handle) {

int result;

/* Check flash handle */
if (NULL != handle) {
if (NULL != *handle) {

/* Set new boot partition */
if (0 != (result = boot_request_upgrade(BOOT_UPGRADE_TEST))) {
Expand All @@ -104,7 +104,7 @@ mender_flash_set_pending_image(void *handle) {
}

/* Release memory */
free(handle);
FREE_AND_NULL(*handle);
} else {

/* This should not happen! */
Expand All @@ -116,10 +116,10 @@ mender_flash_set_pending_image(void *handle) {
}

mender_err_t
mender_flash_abort_deployment(void *handle) {
mender_flash_abort_deployment(void **handle) {

/* Release memory */
free(handle);
FREE_AND_NULL(*handle);

return MENDER_OK;
}
Expand Down
3 changes: 3 additions & 0 deletions platform/tls/generic/mbedtls/src/mender-tls.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,9 @@ mender_tls_init_authentication_keys(mender_err_t (*get_user_provided_keys)(char
case MENDER_LOOP_DETECTED:
assert(false && "Unexpected return value");
/* fallthrough */
case MENDER_ABORTED:
assert(false && "Unexpected return value");
/* fallthrough */
case MENDER_FAIL:
mender_log_error("Unable to get authentication keys from store");
return MENDER_FAIL;
Expand Down