From fe3c819c0beb5e26ff39fb5a5a15a6a83b2bbf56 Mon Sep 17 00:00:00 2001 From: Guillaume Demonet Date: Tue, 26 Jul 2022 10:30:36 +0200 Subject: [PATCH] salt: Handle duplicates in `cri.wait_pod` It usually happens that when kubelet replaces a static pod, there will be a time when two instances of this pod coexist in CRI representation. --- CHANGELOG.md | 7 +++++++ salt/_modules/cri.py | 9 +++++++-- salt/tests/unit/modules/files/test_cri.yaml | 22 +++++++++++++++------ salt/tests/unit/modules/test_cri.py | 1 + 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4858800400..26806e40d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ # CHANGELOG ## Release 123.0.1 (in development) +### Bug fixes + +- [#3827](https://github.com/scality/metalk8s/issues/3827) + Handle an issue with duplicate pods in CRI during a static pod update, + preventing upgrades to 123.0.0 when using an inconsistent registry HA setup + (PR[#3828](https://github.com/scality/metalk8s/pull/3828)) + ## Release 123.0.0 ### Additions diff --git a/salt/_modules/cri.py b/salt/_modules/cri.py index 5594e75fe3..ecffadf971 100644 --- a/salt/_modules/cri.py +++ b/salt/_modules/cri.py @@ -340,8 +340,13 @@ def wait_pod( start_time = time.time() while time.time() - start_time < timeout: - current_id = get_pod_id(name=name, state=state, ignore_not_found=True) - if current_id and current_id != last_id: + current_ids = get_pod_id( + name=name, + state=state, + ignore_not_found=True, + multiple=True, # We may have two during a replacement + ) + if current_ids and last_id not in current_ids: return True remaining = timeout + start_time - time.time() if remaining < sleep: # Don't sleep if we know it's going to time out diff --git a/salt/tests/unit/modules/files/test_cri.yaml b/salt/tests/unit/modules/files/test_cri.yaml index b68b412259..210f23d7a6 100644 --- a/salt/tests/unit/modules/files/test_cri.yaml +++ b/salt/tests/unit/modules/files/test_cri.yaml @@ -129,17 +129,17 @@ wait_pod: pod_ids: - null - null - - abc123 + - [abc123] result: True - # 1. Pod was updated + # 1. Pod was updated (simple delete then create) - name: example timeout: 5 sleep: 1 last_id: abc123 pod_ids: - - abc123 + - [abc123] - null - - def456 + - [def456] result: True # 2. Some crictl error (raise) - name: example @@ -152,7 +152,7 @@ wait_pod: sleep: 1 last_id: abc123 pod_ids: - - abc123 + - [abc123] - null - null raises: True @@ -174,7 +174,17 @@ wait_pod: last_id: abc123 raise_on_timeout: False pod_ids: - - abc123 + - [abc123] - null - null result: False + # 6. Pod was updated (create then delete) + - name: example + timeout: 5 + sleep: 1 + last_id: abc123 + pod_ids: + - [abc123] + - [abc123, def456] + - [def456] + result: True diff --git a/salt/tests/unit/modules/test_cri.py b/salt/tests/unit/modules/test_cri.py index 2bc163747b..3ec7253906 100644 --- a/salt/tests/unit/modules/test_cri.py +++ b/salt/tests/unit/modules/test_cri.py @@ -374,6 +374,7 @@ def pod_ids_mock(*a, **k): name=kwargs.get("name"), state=kwargs.get("state", "ready"), ignore_not_found=True, + multiple=True, ), ) if pod_ids_raise: