diff --git a/.github/workflows/overcloud-host-image-build.yml b/.github/workflows/overcloud-host-image-build.yml index 13bf071f7..9577b042f 100644 --- a/.github/workflows/overcloud-host-image-build.yml +++ b/.github/workflows/overcloud-host-image-build.yml @@ -130,7 +130,7 @@ jobs: - name: Install OpenStack client run: | source venvs/kayobe/bin/activate && - pip install python-openstackclient + pip install python-openstackclient -c https://opendev.org/openstack/requirements/raw/branch/stable/${{ steps.openstack_release.outputs.openstack_release }}/upper-constraints.txt - name: Build a Rocky Linux 9 overcloud host image id: build_rocky_9 diff --git a/.github/workflows/stackhpc-container-image-build.yml b/.github/workflows/stackhpc-container-image-build.yml index 4258fe868..ce230dc1f 100644 --- a/.github/workflows/stackhpc-container-image-build.yml +++ b/.github/workflows/stackhpc-container-image-build.yml @@ -145,6 +145,10 @@ jobs: env: KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD }} + - name: Prune local Kolla container images over 1 week old + run: | + sudo docker image prune --all --force --filter until=168h --filter="label=kolla_version" + - name: Build and push kolla overcloud images run: | args="${{ github.event.inputs.regexes }}" @@ -180,6 +184,9 @@ jobs: run: | sudo docker image ls --filter "reference=ark.stackhpc.com/stackhpc-dev/*:*${{ matrix.distro }}*${{ needs.generate-tag.outputs.datetime_tag }}" > ${{ matrix.distro }}-container-images + - name: Fail if no images have been built + run: if [ $(wc -l < ${{ matrix.distro }}-container-images) -le 1 ]; then exit 1; fi + - name: Upload container images artifact uses: actions/upload-artifact@v3 with: @@ -187,10 +194,6 @@ jobs: path: ${{ matrix.distro }}-container-images retention-days: 7 - - name: Prune local Kolla container images over 1 week old - run: | - sudo docker image prune --all --force --filter until=168h --filter="label=kolla_version" - sync-container-repositories: name: Trigger container image repository sync needs: diff --git a/doc/source/configuration/release-train.rst b/doc/source/configuration/release-train.rst index 88079d8dd..318e60307 100644 --- a/doc/source/configuration/release-train.rst +++ b/doc/source/configuration/release-train.rst @@ -186,6 +186,16 @@ promoted to production: kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-repo-promote-production.yml +Synchronising all Kolla container images can take a long time. A limited list +of images can be synchronised using the ``stackhpc_pulp_images_kolla_filter`` +variable, which accepts a whitespace-separated list of regular expressions +matching Kolla image names. Usage is similar to ``kolla-build`` CLI arguments. +For example: + +.. code-block:: console + + kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-container-sync.yml -e stackhpc_pulp_images_kolla_filter='"^glance nova-compute$"' + Initial seed deployment ----------------------- diff --git a/doc/source/configuration/vault.rst b/doc/source/configuration/vault.rst index cacd12e96..93dcaf7d7 100644 --- a/doc/source/configuration/vault.rst +++ b/doc/source/configuration/vault.rst @@ -229,6 +229,18 @@ Enable the required TLS variables in kayobe and kolla kayobe overcloud service deploy + If VM provisioning fails with an error with this format: + + .. code-block:: + + Unable to establish connection to http://:9696/v2.0/ports/some-sort-of-uuid: Connection aborted + + Restart the nova-compute container on all hypervisors: + + .. code-block:: + + kayobe overcloud host command run --command "docker restart nova_compute" --become --show-output -l compute + Barbican integration ==================== diff --git a/doc/source/configuration/walled-garden.rst b/doc/source/configuration/walled-garden.rst index 9a45ea4db..937619a5a 100644 --- a/doc/source/configuration/walled-garden.rst +++ b/doc/source/configuration/walled-garden.rst @@ -77,7 +77,8 @@ proxy: - "127.0.0.1" - "localhost" - "{{ ('http://' ~ docker_registry) | urlsplit('hostname') if docker_registry else '' }}" - - "{{ admin_oc_net_name | net_ip(inventory_hostname=groups['seed'][0]) }}" + - "{{ lookup('vars', admin_oc_net_name ~ '_ips')[groups.seed.0] }}" + - "{{ lookup('vars', admin_oc_net_name ~ '_ips')[inventory_hostname] }}" - "{{ kolla_external_fqdn }}" - "{{ kolla_internal_fqdn }}" diff --git a/doc/source/configuration/wazuh.rst b/doc/source/configuration/wazuh.rst index ee8999339..ef6216580 100644 --- a/doc/source/configuration/wazuh.rst +++ b/doc/source/configuration/wazuh.rst @@ -17,8 +17,8 @@ The short version #. Deploy the Wazuh agents: ``kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml`` -Wazuh Manager -============= +Wazuh Manager Host +================== Provision using infra-vms ------------------------- @@ -303,7 +303,7 @@ Encrypt the keys (and remember to commit to git): ``ansible-vault encrypt --vault-password-file ~/vault.pass $KAYOBE_CONFIG_PATH/ansible/wazuh/certificates/certs/*.key`` Verification -============== +------------ The Wazuh portal should be accessible on port 443 of the Wazuh manager’s IPs (using HTTPS, with the root CA cert in ``etc/kayobe/ansible/wazuh/certificates/wazuh-certificates/root-ca.pem``). @@ -315,11 +315,9 @@ Troubleshooting Logs are in ``/var/log/wazuh-indexer/wazuh.log``. There are also logs in the journal. -============ Wazuh agents ============ - Wazuh agent playbook is located in ``etc/kayobe/ansible/wazuh-agent.yml``. Wazuh agent variables file is located in ``etc/kayobe/inventory/group_vars/wazuh-agent/wazuh-agent``. @@ -333,13 +331,13 @@ Deploy the Wazuh agents: ``kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml`` Verification -============= +------------ The Wazuh agents should register with the Wazuh manager. This can be verified via the agents page in Wazuh Portal. Check CIS benchmark output in agent section. -Additional resources: -===================== +Additional resources +-------------------- For times when you need to upgrade wazuh with elasticsearch to version with opensearch or you just need to deinstall all wazuh components: Wazuh purge script: https://github.com/stackhpc/wazuh-server-purge diff --git a/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml new file mode 100644 index 000000000..20542df88 --- /dev/null +++ b/etc/kayobe/ansible/ovn-fix-chassis-priorities.yml @@ -0,0 +1,69 @@ +--- +# Sometimes, typically after restarting OVN services, the priorities of entries +# in the ha_chassis and gateway_chassis tables in the OVN northbound database +# can become misaligned. This results in broken routing for external (bare +# metal/SR-IOV) ports. + +# This playbook can be used to fix the issue by realigning the priorities of +# the table entries. It does so by assigning the highest priority to the +# "first" (sorted alphabetically) OVN NB DB host. This results in all gateways +# being scheduled to a single host, but is less complicated than trying to +# balance them (and it's also not clear to me how to map between individual +# ha_chassis and gateway_chassis entries). + +# The playbook can be run as follows: +# kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ovn-fix-chassis-priorities.yml + +# If the 'controllers' group does not align with the group used to deploy the +# OVN NB DB, this can be overridden by passing the following: +# '-e ovn_nb_db_group=some_other_group' + +- name: Find OVN DB DB Leader + hosts: "{{ ovn_nb_db_group | default('controllers') }}" + tasks: + - name: Find the OVN NB DB leader + command: docker exec -it ovn_nb_db ovn-nbctl get-connection + changed_when: false + failed_when: false + register: ovn_check_result + check_mode: no + + - name: Group hosts by leader/follower role + group_by: + key: "ovn_nb_{{ 'leader' if ovn_check_result.rc == 0 else 'follower' }}" + changed_when: false + + - name: Assert one leader exists + assert: + that: + - groups['ovn_nb_leader'] | default([]) | length == 1 + +- name: Fix OVN chassis priorities + hosts: ovn_nb_leader + vars: + ovn_nb_db_group: controllers + ovn_nb_db_hosts_sorted: "{{ query('inventory_hostnames', ovn_nb_db_group) | sort | list }}" + ha_chassis_max_priority: 32767 + gateway_chassis_max_priority: "{{ ovn_nb_db_hosts_sorted | length }}" + tasks: + - name: Fix ha_chassis priorities + command: >- + docker exec -it ovn_nb_db + bash -c ' + ovn-nbctl find ha_chassis chassis_name={{ item }} | + awk '\''$1 == "_uuid" { print $3 }'\'' | + while read uuid; do ovn-nbctl set ha_chassis $uuid priority={{ priority }}; done' + loop: "{{ ovn_nb_db_hosts_sorted }}" + vars: + priority: "{{ ha_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}" + + - name: Fix gateway_chassis priorities + command: >- + docker exec -it ovn_nb_db + bash -c ' + ovn-nbctl find gateway_chassis chassis_name={{ item }} | + awk '\''$1 == "_uuid" { print $3 }'\'' | + while read uuid; do ovn-nbctl set gateway_chassis $uuid priority={{ priority }}; done' + loop: "{{ ovn_nb_db_hosts_sorted }}" + vars: + priority: "{{ gateway_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}" diff --git a/etc/kayobe/ansible/reboot.yml b/etc/kayobe/ansible/reboot.yml index a284dd425..8810afd7f 100644 --- a/etc/kayobe/ansible/reboot.yml +++ b/etc/kayobe/ansible/reboot.yml @@ -1,7 +1,7 @@ --- - name: Reboot the host hosts: seed-hypervisor:seed:overcloud:infra-vms - serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(0, true) }}" + serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(1, true) }}" tags: - reboot tasks: diff --git a/etc/kayobe/kolla/config/bifrost.yml b/etc/kayobe/kolla/config/bifrost/bifrost.yml similarity index 100% rename from etc/kayobe/kolla/config/bifrost.yml rename to etc/kayobe/kolla/config/bifrost/bifrost.yml diff --git a/etc/kayobe/pulp-host-image-versions.yml b/etc/kayobe/pulp-host-image-versions.yml index aa4082005..7be308a6f 100644 --- a/etc/kayobe/pulp-host-image-versions.yml +++ b/etc/kayobe/pulp-host-image-versions.yml @@ -1,5 +1,5 @@ --- # Overcloud host image versioning tags # These images must be in SMS, since they are used by our AIO CI runners -stackhpc_rocky_9_overcloud_host_image_version: "yoga-20230515T145140" +stackhpc_rocky_9_overcloud_host_image_version: "yoga-20230929T133006" stackhpc_ubuntu_jammy_overcloud_host_image_version: "yoga-20230609T120720" diff --git a/etc/kayobe/pulp-repo-versions.yml b/etc/kayobe/pulp-repo-versions.yml index f43493d9f..641fa83c7 100644 --- a/etc/kayobe/pulp-repo-versions.yml +++ b/etc/kayobe/pulp-repo-versions.yml @@ -7,7 +7,7 @@ stackhpc_pulp_repo_centos_stream_9_opstools_version: 20230615T071742 stackhpc_pulp_repo_centos_stream_9_storage_ceph_quincy_version: 20230712T025152 stackhpc_pulp_repo_docker_ce_ubuntu_version: 20230921T005001 stackhpc_pulp_repo_elrepo_9_version: 20230907T075311 -stackhpc_pulp_repo_epel_9_version: 20230921T005001 +stackhpc_pulp_repo_epel_9_version: 20230929T005202 stackhpc_pulp_repo_grafana_version: 20230921T005001 stackhpc_pulp_repo_opensearch_2_x_version: 20230725T013015 stackhpc_pulp_repo_opensearch_dashboards_2_x_version: 20230725T013015 @@ -21,11 +21,11 @@ stackhpc_pulp_repo_rocky_9_1_baseos_version: 20230921T005001 stackhpc_pulp_repo_rocky_9_1_crb_version: 20230921T005001 stackhpc_pulp_repo_rocky_9_1_extras_version: 20230921T005001 stackhpc_pulp_repo_rocky_9_1_highavailability_version: 20230921T005001 -stackhpc_pulp_repo_rocky_9_2_appstream_version: 20230825T131407 -stackhpc_pulp_repo_rocky_9_2_baseos_version: 20230825T131407 -stackhpc_pulp_repo_rocky_9_2_crb_version: 20230825T131407 -stackhpc_pulp_repo_rocky_9_2_extras_version: 20230825T131407 -stackhpc_pulp_repo_rocky_9_2_highavailability_version: 20230805T012805 +stackhpc_pulp_repo_rocky_9_2_appstream_version: 20230928T024829 +stackhpc_pulp_repo_rocky_9_2_baseos_version: 20230928T024829 +stackhpc_pulp_repo_rocky_9_2_crb_version: 20230928T024829 +stackhpc_pulp_repo_rocky_9_2_extras_version: 20230915T001040 +stackhpc_pulp_repo_rocky_9_2_highavailability_version: 20230918T015928 stackhpc_pulp_repo_ubuntu_jammy_security_version: 20230908T053616 stackhpc_pulp_repo_ubuntu_jammy_version: 20230908T053616 stackhpc_pulp_repo_ubuntu_cloud_archive_version: 20230908T112533 diff --git a/etc/kayobe/stackhpc-overcloud-dib.yml b/etc/kayobe/stackhpc-overcloud-dib.yml index 881f2f674..6157a2671 100644 --- a/etc/kayobe/stackhpc-overcloud-dib.yml +++ b/etc/kayobe/stackhpc-overcloud-dib.yml @@ -67,12 +67,15 @@ stackhpc_overcloud_dib_packages: - "vim" - "git" - "less" + - "python3" - "{% if os_distribution == 'ubuntu' %}netbase{% endif %}" - "{% if os_distribution == 'ubuntu' %}iputils-ping{% endif %}" - "{% if os_distribution == 'ubuntu' %}curl{% endif %}" - "{% if os_distribution == 'ubuntu' %}apt-utils{% endif %}" - "{% if os_distribution == 'rocky' %}NetworkManager-config-server{% endif %}" - "{% if os_distribution == 'rocky' %}linux-firmware{% endif %}" + - "{% if os_distribution == 'rocky' %}cloud-utils-growpart{% endif %}" + - "{% if os_distribution == 'ubuntu' %}cloud-guest-utils{% endif %}" # StackHPC overcloud DIB image block device configuration. # This image layout conforms to the CIS partition benchmarks. diff --git a/releasenotes/notes/bump-rocky8-snapshots-2023-09-29-e115427edd3334c7.yaml b/releasenotes/notes/bump-rocky8-snapshots-2023-09-29-e115427edd3334c7.yaml new file mode 100644 index 000000000..f44c44d98 --- /dev/null +++ b/releasenotes/notes/bump-rocky8-snapshots-2023-09-29-e115427edd3334c7.yaml @@ -0,0 +1,7 @@ +--- +security: + - | + The Rocky 8 minor version has been bumped to 8.8 and new snapshots have + been created to include fixes for Zenbleed (CVE-2023-20593), Downfall + (CVE-2022-40982). It is recommended that you update your OS packages and + reboot into the kernel as soon as possible. diff --git a/releasenotes/notes/bump-rocky9-snapshots-2023-09-29-c736c3d37afd7e5c.yaml b/releasenotes/notes/bump-rocky9-snapshots-2023-09-29-c736c3d37afd7e5c.yaml new file mode 100644 index 000000000..83a9f5565 --- /dev/null +++ b/releasenotes/notes/bump-rocky9-snapshots-2023-09-29-c736c3d37afd7e5c.yaml @@ -0,0 +1,7 @@ +--- +security: + - | + The snapshots for Rocky 9.2 have been refreshed to include fixes for + Zenbleed (CVE-2023-20593), Downfall (CVE-2022-40982). It is recommended + that you update your OS packages and reboot into the kernel as soon as + possible. diff --git a/releasenotes/notes/reboot-default-serial-5944a2a648da71c7.yaml b/releasenotes/notes/reboot-default-serial-5944a2a648da71c7.yaml new file mode 100644 index 000000000..7eb2e28cd --- /dev/null +++ b/releasenotes/notes/reboot-default-serial-5944a2a648da71c7.yaml @@ -0,0 +1,6 @@ +--- +upgrade: + - | + The ``reboot.yml`` custom Ansible playbook now defaults to reboot only one + host at a time. Existing behaviour can be retained by setting + ANSIBLE_SERIAL=0.