Skip to content

Commit

Permalink
Merge pull request #10721 from dahorak/enhance-ai-ocp-deployment-rebo…
Browse files Browse the repository at this point in the history
…ot-not-connected-server

reboot server if not properly discovered by Assisted Installer
  • Loading branch information
petr-balogh authored Oct 29, 2024
2 parents ec14e1f + 5e441dc commit 41548df
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
7 changes: 7 additions & 0 deletions ocs_ci/deployment/assisted_installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,13 @@ def wait_for_discovered_nodes(self, expected_nodes):
)
break

def get_infra_env_hosts(self):
"""
Return:
list: list of discovered hosts in the Infrastructure Environment
"""
return self.api.get_infra_env_hosts(infra_env_id=self.infra_id)

@retry(HostValidationFailed, tries=5, delay=60, backoff=1)
def verify_validations_info_for_discovered_nodes(self):
"""
Expand Down
14 changes: 12 additions & 2 deletions ocs_ci/deployment/baremetal.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ocs_ci.deployment.ocp import OCPDeployment as BaseOCPDeployment
from ocs_ci.deployment import assisted_installer
from ocs_ci.ocs import constants, ocp, exceptions
from ocs_ci.ocs.exceptions import CommandFailed, RhcosImageNotFound
from ocs_ci.ocs.exceptions import CommandFailed, RhcosImageNotFound, TimeoutExpiredError
from ocs_ci.ocs.node import get_nodes
from ocs_ci.ocs.openshift_ops import OCP
from ocs_ci.utility import ibmcloud_bm
Expand Down Expand Up @@ -1054,7 +1054,17 @@ def deploy(self, log_cli_level="DEBUG"):
expected_node_num = (
config.ENV_DATA["master_replicas"] + config.ENV_DATA["worker_replicas"]
)
self.ai_cluster.wait_for_discovered_nodes(expected_node_num)
try:
self.ai_cluster.wait_for_discovered_nodes(expected_node_num)
except TimeoutExpiredError:
discovered_hosts = [
host["requested_hostname"]
for host in self.ai_cluster.get_infra_env_hosts()
]
for machine in master_nodes + worker_nodes:
if machine not in discovered_hosts:
self.set_pxe_boot_and_reboot(machine)
self.ai_cluster.wait_for_discovered_nodes(expected_node_num)

# verify validations info
self.ai_cluster.verify_validations_info_for_discovered_nodes()
Expand Down

0 comments on commit 41548df

Please sign in to comment.