From ad2d0a5d61cc5799005756712144f915b5b2f70d Mon Sep 17 00:00:00 2001 From: "Alexander V. Nikolaev" Date: Tue, 10 Sep 2024 18:01:11 +0300 Subject: [PATCH 1/3] Remove unneeded .clone() Signed-off-by: Alexander V. Nikolaev --- src/admin/server.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/admin/server.rs b/src/admin/server.rs index 6ccd5ee..3dd9d4c 100644 --- a/src/admin/server.rs +++ b/src/admin/server.rs @@ -184,11 +184,7 @@ impl AdminServiceImpl { debug!("Monitoring {}...", &entry.name); match self.get_remote_status(&entry).await { Err(err) => { - error!( - "could not get status of unit {}: {}", - entry.name.clone(), - err - ); + error!("could not get status of unit {}: {}", &entry.name, err); self.handle_error(entry) .await .with_context(|| "during handle error")? From 05716095b121db74ea190c36bafb298db1fc5d18 Mon Sep 17 00:00:00 2001 From: "Alexander V. Nikolaev" Date: Tue, 10 Sep 2024 18:04:25 +0300 Subject: [PATCH 2/3] Fix bug with deregistering failed application Signed-off-by: Alexander V. Nikolaev --- common/src/types.rs | 7 ++++++- src/admin/server.rs | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/common/src/types.rs b/common/src/types.rs index f9ea79e..84ccdae 100644 --- a/common/src/types.rs +++ b/common/src/types.rs @@ -149,7 +149,12 @@ pub struct UnitStatus { impl UnitStatus { pub fn is_running(&self) -> bool { - self.active_state == "active" + self.active_state == "active" && self.load_state == "loaded" && self.sub_state == "running" + } + pub fn is_exitted(&self) -> bool { + self.active_state == "inactive" + && self.load_state == "not-found" + && self.sub_state == "dead" } } diff --git a/src/admin/server.rs b/src/admin/server.rs index 3dd9d4c..0b23bb1 100644 --- a/src/admin/server.rs +++ b/src/admin/server.rs @@ -160,7 +160,9 @@ impl AdminServiceImpl { pub async fn handle_error(&self, entry: RegistryEntry) -> anyhow::Result<()> { match (entry.r#type.vm, entry.r#type.service) { (VmType::AppVM, ServiceType::App) => { - self.registry.deregister(&entry.name)?; + if entry.status.is_exitted() { + self.registry.deregister(&entry.name)?; + } Ok(()) } (VmType::AppVM, ServiceType::Mgr) | (VmType::SysVM, ServiceType::Mgr) => { From 06f5849800870265bcc32bec0b926ae7c849db83 Mon Sep 17 00:00:00 2001 From: "Alexander V. Nikolaev" Date: Tue, 10 Sep 2024 18:04:43 +0300 Subject: [PATCH 3/3] Improve test covering for restarting applications Signed-off-by: Alexander V. Nikolaev --- nixos/tests/admin.nix | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/nixos/tests/admin.nix b/nixos/tests/admin.nix index b783882..8ba7825 100644 --- a/nixos/tests/admin.nix +++ b/nixos/tests/admin.nix @@ -200,7 +200,10 @@ in admin = adminSettings; tls = mkTls "chromium-vm"; applications = lib.mkForce ( - builtins.toJSON { "foot" = "/run/current-system/sw/bin/run-waypipe ${pkgs.foot}/bin/foot"; } + builtins.toJSON { + "foot" = "/run/current-system/sw/bin/run-waypipe ${pkgs.foot}/bin/foot"; + "clearexit" = "/run/current-system/sw/bin/sleep 5"; + } ); }; }; @@ -288,10 +291,23 @@ in swaymsg("exec ssh -R /tmp/vsock:/tmp/vsock -f -N ${addrs.appvm}") time.sleep(5) # Give ssh some time to setup remote socket - #swaymsg("exec run-waypipe foot") - print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot")) - time.sleep(10) # Give few seconds to application to spin up - wait_for_window("ghaf@appvm") + with subtest("Clean run"): + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot")) + time.sleep(10) # Give few seconds to application to spin up + wait_for_window("ghaf@appvm") + + with subtest("crash and restart"): + # Crash application + appvm.succeed("pkill foot") + time.sleep(10) + # .. then ask to restart + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot")) + wait_for_window("ghaf@appvm") + + with subtest("clear exit and restart"): + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start --vm foot-vm clearexit")) + time.sleep(20) # Give few seconds to application to spin up, exit, then start it again + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start --vm foot-vm clearexit")) ''; }; };