Skip to content

Commit

Permalink
Fix host image builds on Arc runners
Browse files Browse the repository at this point in the history
Arc runners are kubernetes-orchestrated github runners. Host image
builds do not work on these runners, so this commit adapts the host
image build workflow to spin up a worker VM which executes the build.
  • Loading branch information
markgoddard authored and Alex-Welsh committed Mar 26, 2024
1 parent 6b347d2 commit 6ce7d97
Show file tree
Hide file tree
Showing 13 changed files with 476 additions and 186 deletions.
402 changes: 239 additions & 163 deletions .github/workflows/overcloud-host-image-build.yml

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions .github/workflows/stackhpc-ci-cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,23 @@ jobs:
OS_CLOUD: openstack
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}

- name: Clean up host image builder instances over 5 hours old
run: |
result=0
changes_before=$(date -Imin -d -5hours)
for status in ACTIVE BUILD ERROR SHUTOFF; do
for instance in $(openstack server list --tags skc-host-image-build --os-compute-api-version 2.66 --format value --column ID --changes-before $changes_before --status $status); do
echo "Cleaning up $status instance $instance"
openstack server show $instance
if ! openstack server delete $instance; then
echo "Failed to delete $status instance $instance"
result=1
fi
done
done
exit $result
env:
OS_CLOUD: openstack
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
42 changes: 42 additions & 0 deletions etc/kayobe/ansible/openstack-host-image-upload.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
# This playbook is desinged to be used by the overcloud-host-image-build.yml
# GitHub workflow to upload newly-built images to a development cloud for
# testing and use in CI.
- name: Upload an OS image to Glance
hosts: seed
vars:
local_image_path: "/opt/kayobe/images/overcloud-{{ os_distribution }}-{{ os_release }}/overcloud-{{ os_distribution }}-{{ os_release }}.qcow2"
image_name: "overcloud-{{ os_distribution }}-{{ os_release }}"
tasks:
- name: Write out clouds.yml
copy:
content: "{{ lookup('ansible.builtin.env', 'CLOUDS_YAML') }}"
dest: clouds.yaml
mode: 0664

- name: Write out secure.yml
no_log: true
vars:
- os_secrets:
clouds:
openstack:
auth:
application_credential_id: "{{ lookup('ansible.builtin.env', 'OS_APPLICATION_CREDENTIAL_ID') }}"
application_credential_secret: "{{ lookup('ansible.builtin.env', 'OS_APPLICATION_CREDENTIAL_SECRET') }}"
copy:
content: "{{ os_secrets | to_nice_yaml }}"
dest: secure.yaml
mode: 0664

- name: Ensure dependencies are installed
pip:
name: openstacksdk

- name: Upload an image to Glance
openstack.cloud.image:
cloud: openstack
name: "{{ image_name }}"
container_format: bare
disk_format: qcow2
state: present
filename: "{{ local_image_path }}"
16 changes: 8 additions & 8 deletions etc/kayobe/ansible/pulp-host-image-upload.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---
- name: Upload and create a distribution for an image
hosts: localhost
hosts: seed
vars:
remote_pulp_url: "{{ stackhpc_release_pulp_url }}"
remote_pulp_username: "{{ stackhpc_image_repository_username }}"
remote_pulp_password: "{{ stackhpc_image_repository_password }}"
repository_name: "kayobe-images-{{ openstack_release }}-{{ os_distribution }}-{{ os_release }}"
base_path: "kayobe-images/{{ openstack_release }}/{{ os_distribution }}/{{ os_release }}"
pulp_base_path: "kayobe-images/{{ openstack_release }}/{{ os_distribution }}/{{ os_release }}"
tasks:
- name: Print image tag
debug:
Expand Down Expand Up @@ -74,7 +74,7 @@
username: "{{ remote_pulp_username }}"
password: "{{ remote_pulp_password }}"
name: "{{ repository_name }}_latest"
base_path: "{{ base_path }}/latest"
base_path: "{{ pulp_base_path }}/latest"
publication: "{{ publication_details.publication.pulp_href }}"
content_guard: development
state: present
Expand All @@ -86,7 +86,7 @@
username: "{{ remote_pulp_username }}"
password: "{{ remote_pulp_password }}"
name: "{{ repository_name }}_{{ host_image_tag }}"
base_path: "{{ base_path }}/{{ host_image_tag }}"
base_path: "{{ pulp_base_path }}/{{ host_image_tag }}"
publication: "{{ publication_details.publication.pulp_href }}"
content_guard: development
state: present
Expand All @@ -95,26 +95,26 @@
- name: Update new images file with versioned path
lineinfile:
path: /tmp/updated_images.txt
line: "{{ remote_pulp_url }}/pulp/content/{{ base_path }}/\
line: "{{ remote_pulp_url }}/pulp/content/{{ pulp_base_path }}/\
{{ host_image_tag }}/{{ found_files.files[0].path | basename }}"
create: true

- name: Update new images file with latest path
lineinfile:
path: /tmp/updated_images.txt
line: "{{ remote_pulp_url }}/pulp/content/{{ base_path }}/\
line: "{{ remote_pulp_url }}/pulp/content/{{ pulp_base_path }}/\
latest/{{ found_files.files[0].path | basename }}"
when: latest_distribution_details.changed

- name: Print versioned path
debug:
msg: "New versioned path: {{ remote_pulp_url }}/pulp/content/{{ base_path }}/\
msg: "New versioned path: {{ remote_pulp_url }}/pulp/content/{{ pulp_base_path }}/\
{{ host_image_tag }}/{{ found_files.files[0].path | basename }}"
when: latest_distribution_details.changed

- name: Print latest path
debug:
msg: "New latest path: {{ remote_pulp_url }}/pulp/content/{{ base_path }}/\
msg: "New latest path: {{ remote_pulp_url }}/pulp/content/{{ pulp_base_path }}/\
latest/{{ found_files.files[0].path | basename }}"
when: latest_distribution_details.changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,5 @@
###############################################################################
# Network interface definitions for the controller group.

# Controller interface on all-in-one network.
aio_interface: breth1
# Use dummy1 if it exists, otherwise the bridge will have no ports.
aio_bridge_ports: "{{ ['dummy1'] if 'dummy1' in hostvars[inventory_hostname].ansible_facts else [] }}"

###############################################################################
# Dummy variable to allow Ansible to accept this file.
workaround_ansible_issue_8743: yes

# Seed interface on all-in-one network.
aio_interface: eth0
2 changes: 1 addition & 1 deletion etc/kayobe/overcloud-dib.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ overcloud_dib_host_packages_extra:
overcloud_dib_git_elements_extra:
- repo: "https://github.com/stackhpc/stackhpc-image-elements"
local: "{{ source_checkout_path }}/stackhpc-image-elements"
version: "v1.6.0"
version: "v1.6.1"
elements_path: "elements"

# List of git repositories containing Diskimage Builder (DIB) elements. See
Expand Down
10 changes: 5 additions & 5 deletions etc/kayobe/pulp-host-image-versions.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
# Overcloud host image versioning tags
# These images must be in SMS, since they are used by our AIO CI runners
stackhpc_centos_8_stream_overcloud_host_image_version: "yoga-20230525T095243"
stackhpc_rocky_8_overcloud_host_image_version: "yoga-20230629T135322"
stackhpc_rocky_9_overcloud_host_image_version: "yoga-20240124T094316"
stackhpc_ubuntu_focal_overcloud_host_image_version: "yoga-20230609T120720"
stackhpc_ubuntu_jammy_overcloud_host_image_version: "yoga-20231012T121552"
stackhpc_centos_8_stream_overcloud_host_image_version: "yoga-20240326T091511"
stackhpc_rocky_8_overcloud_host_image_version: "yoga-20240326T091511"
stackhpc_rocky_9_overcloud_host_image_version: "yoga-20240326T091511"
stackhpc_ubuntu_focal_overcloud_host_image_version: "yoga-20240326T091511"
stackhpc_ubuntu_jammy_overcloud_host_image_version: "yoga-20240326T091511"
9 changes: 9 additions & 0 deletions terraform/host-image-builder/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
============================
Terraform Host Image Builder
============================

This Terraform configuration deploys a single VM on an OpenStack cloud, to
build overcloud host images.

This configuration is used in the GitHub Actions overcloud-host-image-build.yml
workflow.
7 changes: 7 additions & 0 deletions terraform/host-image-builder/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
output "access_ip_v4" {
value = openstack_compute_instance_v2.kayobe-host-image-builder.access_ip_v4
}

output "access_interface" {
value = var.host_image_builder_interface
}
14 changes: 14 additions & 0 deletions terraform/host-image-builder/provider.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#provider "openstack" {
# use environment variables
#}

terraform {
required_version = ">= 0.14"
backend "local" {
}
required_providers {
openstack = {
source = "terraform-provider-openstack/openstack"
}
}
}
21 changes: 21 additions & 0 deletions terraform/host-image-builder/templates/userdata.cfg.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#cloud-config
# Don't automatically mount ephemeral disk
mounts:
- [/dev/vdb, null]
# WORKAROUND: internal DNS missing from SMS lab. (currently unused)
runcmd:
- 'echo "10.0.0.34 pelican pelican.service.compute.sms-lab.cloud" >> /etc/hosts'
- 'echo "10.205.3.187 pulp-server pulp-server.internal.sms-cloud" >> /etc/hosts'
# Configure SSH keys here, to avoid creating an ephemeral keypair.
# This means only the instance needs to be cleaned up if the destroy fails.
ssh_authorized_keys:
- ${ssh_public_key}

write_files:
# WORKAROUND: https://bugs.launchpad.net/kolla-ansible/+bug/1995409
- content: |
#!/bin/bash
docker exec openvswitch_vswitchd ovs-vsctl "$@"
owner: root:root
path: /usr/bin/ovs-vsctl
permissions: '0755'
10 changes: 10 additions & 0 deletions terraform/host-image-builder/terraform.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ssh_public_key = "id_rsa.pub"
ssh_username = "rocky"
host_image_builder_name = "skc-ci-host-image-builder"
# Must be a Rocky Linux 9 host to successfully build all images
# This MUST NOT be an LVM image. It can cause confusing conficts with the built image.
host_image_builder_image = "Rocky-9-GenericCloud-Base-9.3-20231113.0.x86_64.qcow2"
host_image_builder_flavor = "en1.medium"
host_image_builder_network = "stackhpc-ci"
host_image_builder_subnet = "stackhpc-ci"
host_image_builder_interface = "eth0"
98 changes: 98 additions & 0 deletions terraform/host-image-builder/vm.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
variable "ssh_public_key" {
type = string
}

variable "ssh_username" {
type = string
}

variable "host_image_builder_name" {
type = string
default = "kayobe-host-image-builder"
}

variable "host_image_builder_image" {
type = string
default = "Rocky-9"
}

variable "host_image_builder_interface" {
type = string
default = "eth0"
}

variable "host_image_builder_flavor" {
type = string
}

variable "host_image_builder_network" {
type = string
}

variable "host_image_builder_subnet" {
type = string
}

variable "host_image_builder_volume_size" {
type = number
default = 150
}

variable "host_image_builder_tags" {
type = list(string)
default = []
}

locals {
image_is_uuid = length(regexall("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", var.host_image_builder_image)) > 0
}

data "openstack_images_image_v2" "image" {
name = var.host_image_builder_image
most_recent = true
count = local.image_is_uuid ? 0 : 1
}

data "openstack_networking_subnet_v2" "network" {
name = var.host_image_builder_subnet
}

resource "openstack_compute_instance_v2" "kayobe-host-image-builder" {
name = var.host_image_builder_name
flavor_name = "en1.medium"
config_drive = true
user_data = templatefile("templates/userdata.cfg.tpl", {ssh_public_key = file(var.ssh_public_key)})
network {
name = var.host_image_builder_network
}

block_device {
uuid = local.image_is_uuid ? var.host_image_builder_image: data.openstack_images_image_v2.image[0].id
source_type = "image"
volume_size = var.host_image_builder_volume_size
boot_index = 0
destination_type = "volume"
delete_on_termination = true
}

tags = var.host_image_builder_tags
}

# Wait for the instance to be accessible via SSH before progressing.
resource "null_resource" "kayobe-host-image-builder" {
provisioner "remote-exec" {
connection {
host = openstack_compute_instance_v2.kayobe-host-image-builder.access_ip_v4
user = var.ssh_username
private_key = file("id_rsa")
# Terraform will run the start script from /tmp by default. For the
# current images, /tmp is noexec, so the path must be changed
script_path = "/home/${var.ssh_username}/start.sh"
}

inline = [
"#!/bin/sh",
"echo 'connected!'"
]
}
}

0 comments on commit 6ce7d97

Please sign in to comment.