diff --git a/roles/grubcmdline/tasks/main.yml b/roles/grubcmdline/tasks/main.yml index 8a329e7..f2f12d8 100644 --- a/roles/grubcmdline/tasks/main.yml +++ b/roles/grubcmdline/tasks/main.yml @@ -83,7 +83,7 @@ - ansible_facts.os_family == "RedHat" - ansible_facts.distribution_version is version('8.0', '<=') -- name: Generate new grub config (Ubuntu/Debian) # noqa: no-changed-when +- name: Generate new grub config (Ubuntu/Debian) # noqa: no-changed-when ansible.builtin.command: /usr/sbin/update-grub become: true when: ansible_facts.os_family == "Debian" diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 03c8688..4ca9872 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -3,7 +3,7 @@ ansible.builtin.include_role: name: stackhpc.linux.grubcmdline vars: - kernel_cmdline: # noqa: var-naming[no-role-prefix] + kernel_cmdline: # noqa: var-naming[no-role-prefix] - intel_iommu=on kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] - ^intel_iommu= diff --git a/roles/sriov/README.md b/roles/sriov/README.md new file mode 100644 index 0000000..600f4eb --- /dev/null +++ b/roles/sriov/README.md @@ -0,0 +1,54 @@ +stackhpc.sriov +============== + +[![Build Status](https://travis-ci.com/stackhpc/ansible-role-sriov.svg?branch=master)](https://travis-ci.com/stackhpc/ansible-role-sriov) + +Ansible role to enable SR-IOV on network devices. + +Requirements +------------ +None + +Role Variables +-------------- + +See `defaults/main.yml` + +Dependencies +------------ + +- `stackhpc.grubcmdline` + +Example Playbook +---------------- + +``` +- name: configure sr-iov + hosts: compute + vars: + sriov_devices: + - name: p4p1 + numvfs: 63 + - name: p3p1 + numvfs: 8 + # Don't add a udev rule to set numvfs. This can be useful if you use an alternative method + # to set the number of virtual functions e.g some custom scripts to enable VFLAG, but want + # to use the role to set firmware parameters. + on_boot_configuration_enabled: false + tasks: + - include_role: + name: sriov + handlers: + - name: reboot + include_tasks: tasks/reboot.yml +``` + +License +------- + +Apache2 + +Author Information +------------------ + +Will Szumski diff --git a/roles/sriov/defaults/main.yml b/roles/sriov/defaults/main.yml new file mode 100644 index 0000000..17376ce --- /dev/null +++ b/roles/sriov/defaults/main.yml @@ -0,0 +1,34 @@ +--- +# List of NICs to configure +sriov_devices: [] +sriov_workdir: "{{ ansible_facts.env.HOME }}/.sriov" + +sriov_mft_url: https://www.mellanox.com/downloads/MFT/mft-4.23.0-104-x86_64-rpm.tgz + +sriov_mft_tarball: "{{ sriov_mft_url | urlsplit('path') | basename }}" + +sriov_os_pkgs: + - gcc + - rpm-build + # -E- Could not find lspci, you may need to install "pciutils" package + - pciutils + # NOTE(wszumski): Assumes running latest kernel. We could include package version with + # output of uname -r, but the packages aren't always available. + - kernel-devel + - make + +sriov_restart_handler: reboot + +sriov_numvfs: 8 + +sriov_udev_rule_path: /etc/udev/rules.d/70-sriov.rules + +sriov_mellanox_vendor_ids: + - "0x15b3" + +# Flag to skip installing any software dependencies. This may be useful if you +# are providing them by some other means. +sriov_install_enabled: true + +# Set iommu=pt on the kernel cmdline +sriov_iommu_pt_enabled: true diff --git a/roles/sriov/handlers/main.yml b/roles/sriov/handlers/main.yml new file mode 100644 index 0000000..ed97d53 --- /dev/null +++ b/roles/sriov/handlers/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/sriov/molecule/default/Dockerfile.j2 b/roles/sriov/molecule/default/Dockerfile.j2 new file mode 100644 index 0000000..e6aa95d --- /dev/null +++ b/roles/sriov/molecule/default/Dockerfile.j2 @@ -0,0 +1,14 @@ +# Molecule managed + +{% if item.registry is defined %} +FROM {{ item.registry.url }}/{{ item.image }} +{% else %} +FROM {{ item.image }} +{% endif %} + +RUN if [ $(command -v apt-get) ]; then apt-get update && apt-get install -y python sudo bash ca-certificates && apt-get clean; \ + elif [ $(command -v dnf) ]; then dnf makecache && dnf --assumeyes install python sudo python-devel python*-dnf bash && dnf clean all; \ + elif [ $(command -v yum) ]; then yum makecache fast && yum install -y python sudo yum-plugin-ovl bash && sed -i 's/plugins=0/plugins=1/g' /etc/yum.conf && yum clean all; \ + elif [ $(command -v zypper) ]; then zypper refresh && zypper install -y python sudo bash python-xml && zypper clean -a; \ + elif [ $(command -v apk) ]; then apk update && apk add --no-cache python sudo bash ca-certificates; \ + elif [ $(command -v xbps-install) ]; then xbps-install -Syu && xbps-install -y python sudo bash ca-certificates && xbps-remove -O; fi diff --git a/roles/sriov/molecule/default/INSTALL.rst b/roles/sriov/molecule/default/INSTALL.rst new file mode 100644 index 0000000..6a44bde --- /dev/null +++ b/roles/sriov/molecule/default/INSTALL.rst @@ -0,0 +1,22 @@ +******* +Docker driver installation guide +******* + +Requirements +============ + +* Docker Engine + +Install +======= + +Please refer to the `Virtual environment`_ documentation for installation best +practices. If not using a virtual environment, please consider passing the +widely recommended `'--user' flag`_ when invoking ``pip``. + +.. _Virtual environment: https://virtualenv.pypa.io/en/latest/ +.. _'--user' flag: https://packaging.python.org/tutorials/installing-packages/#installing-to-the-user-site + +.. code-block:: bash + + $ pip install 'molecule[docker]' diff --git a/roles/sriov/molecule/default/files/uname b/roles/sriov/molecule/default/files/uname new file mode 100644 index 0000000..b4aa130 --- /dev/null +++ b/roles/sriov/molecule/default/files/uname @@ -0,0 +1,12 @@ +#!/bin/sh + +# MFT compile depends on detection of kernel version and architecture + +if [ $1 = "-m" ]; then + echo "x86_64" +else + for f in /usr/src/kernels/*; do + echo $(basename $f) + break + done +fi diff --git a/roles/sriov/molecule/default/molecule.yml b/roles/sriov/molecule/default/molecule.yml new file mode 100644 index 0000000..037d97b --- /dev/null +++ b/roles/sriov/molecule/default/molecule.yml @@ -0,0 +1,33 @@ +--- +dependency: + name: galaxy +driver: + name: docker +lint: + name: yamllint +platforms: + - name: centos7-docker + image: mplachter/docker-centos7-molecule + command: /usr/sbin/init + volumes: + - /sys/fs/cgroup:/sys/fs/cgroup:ro + privileged: true +provisioner: + name: ansible + ansible_args: + - --skip-tags=skip_when_testing + options: + become: true + lint: + name: ansible-lint + options: + R: true +scenario: + name: default +verifier: + name: testinfra + directory: ../tests + options: + verbose: true + lint: + name: flake8 diff --git a/roles/sriov/molecule/default/playbook.yml b/roles/sriov/molecule/default/playbook.yml new file mode 100644 index 0000000..8392fa6 --- /dev/null +++ b/roles/sriov/molecule/default/playbook.yml @@ -0,0 +1,10 @@ +--- +- name: Converge + hosts: all + roles: + - role: sriov + handlers: + - name: Simulate reboot + listen: reboot + ansible.builtin.debug: + msg: Simulating reboot diff --git a/roles/sriov/molecule/default/prepare.yml b/roles/sriov/molecule/default/prepare.yml new file mode 100644 index 0000000..b66fbef --- /dev/null +++ b/roles/sriov/molecule/default/prepare.yml @@ -0,0 +1,17 @@ +--- +- name: Install kernel packages and replace uname + hosts: all + tasks: + - name: Install kernel packages + ansible.builtin.package: + name: + - kernel + - kernel-devel + state: present + - name: Replace uname to fix detection of kernel and architecture in docker + ansible.builtin.copy: + src: files/uname + dest: /bin/uname + owner: root + group: root + mode: "0755" diff --git a/roles/sriov/molecule/default/requirements.yml b/roles/sriov/molecule/default/requirements.yml new file mode 100644 index 0000000..2223bbd --- /dev/null +++ b/roles/sriov/molecule/default/requirements.yml @@ -0,0 +1,3 @@ +--- +- src: git+https://github.com/stackhpc/ansible-role-grubcmdline + name: stackhpc.grubcmdline diff --git a/roles/sriov/molecule/tests/test_default.py b/roles/sriov/molecule/tests/test_default.py new file mode 100644 index 0000000..046223e --- /dev/null +++ b/roles/sriov/molecule/tests/test_default.py @@ -0,0 +1,11 @@ +import os + +import testinfra.utils.ansible_runner + +testinfra_hosts = testinfra.utils.ansible_runner.AnsibleRunner( + os.environ['MOLECULE_INVENTORY_FILE']).get_hosts('all') + + +def test_stub(host): + # TODO: add some tests + pass diff --git a/roles/sriov/molecule/vagrant/INSTALL.rst b/roles/sriov/molecule/vagrant/INSTALL.rst new file mode 100644 index 0000000..4f44b67 --- /dev/null +++ b/roles/sriov/molecule/vagrant/INSTALL.rst @@ -0,0 +1,23 @@ +******* +Vagrant driver installation guide +******* + +Requirements +============ + +* Vagrant +* Virtualbox, Parallels, VMware Fusion, VMware Workstation or VMware Desktop + +Install +======= + +Please refer to the `Virtual environment`_ documentation for installation best +practices. If not using a virtual environment, please consider passing the +widely recommended `'--user' flag`_ when invoking ``pip``. + +.. _Virtual environment: https://virtualenv.pypa.io/en/latest/ +.. _'--user' flag: https://packaging.python.org/tutorials/installing-packages/#installing-to-the-user-site + +.. code-block:: bash + + $ pip install 'molecule[vagrant]' diff --git a/roles/sriov/molecule/vagrant/molecule.yml b/roles/sriov/molecule/vagrant/molecule.yml new file mode 100644 index 0000000..486fd8b --- /dev/null +++ b/roles/sriov/molecule/vagrant/molecule.yml @@ -0,0 +1,31 @@ +--- +dependency: + name: galaxy +driver: + name: vagrant + provider: + name: virtualbox +lint: + name: yamllint +platforms: + - name: bento-centos-7-vagrant + box: bento/centos-7 + cpus: 1 + memory: 1024 +provisioner: + name: ansible + options: + become: true + lint: + name: ansible-lint + options: + R: true +scenario: + name: vagrant +verifier: + name: testinfra + directory: ../tests + options: + verbose: true + lint: + name: flake8 diff --git a/roles/sriov/molecule/vagrant/playbook.yml b/roles/sriov/molecule/vagrant/playbook.yml new file mode 100644 index 0000000..618c1b0 --- /dev/null +++ b/roles/sriov/molecule/vagrant/playbook.yml @@ -0,0 +1,5 @@ +--- +- name: Converge + hosts: all + roles: + - role: sriov diff --git a/roles/sriov/molecule/vagrant/prepare.yml b/roles/sriov/molecule/vagrant/prepare.yml new file mode 100644 index 0000000..cd7e21a --- /dev/null +++ b/roles/sriov/molecule/vagrant/prepare.yml @@ -0,0 +1,9 @@ +--- +- name: Prepare + hosts: all + gather_facts: false + tasks: + - name: Install python for Ansible + ansible.builtin.raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) + become: true + changed_when: false diff --git a/roles/sriov/tasks/all.yml b/roles/sriov/tasks/all.yml new file mode 100644 index 0000000..d3cd8fe --- /dev/null +++ b/roles/sriov/tasks/all.yml @@ -0,0 +1,6 @@ +--- +- name: Include config tasks + ansible.builtin.include_tasks: config.yml + +- name: Include mellanox tasks + ansible.builtin.include_tasks: mellanox.yml diff --git a/roles/sriov/tasks/config.yml b/roles/sriov/tasks/config.yml new file mode 100644 index 0000000..f6539a0 --- /dev/null +++ b/roles/sriov/tasks/config.yml @@ -0,0 +1,40 @@ +--- +- name: Persist sriov_numvfs with udev rule + ansible.builtin.blockinfile: + path: "{{ sriov_udev_rule_path }}" + block: | + {% for device in sriov_devices %} + {% if device.on_boot_configuration_enabled | default(true) | bool %} + SUBSYSTEM=="net", ACTION=="add", KERNEL=="{{ device.name }}", RUN+="/usr/bin/sh -c 'echo {{ device.numvfs | default(sriov_numvfs) }} > /sys/class/net/{{ device.name }}/device/sriov_numvfs'" + {% endif %} + {% endfor %} + marker_begin: BEGIN SRIOV + mode: "0644" + owner: root + group: root + create: true + become: true + notify: "{{ sriov_restart_handler }}" + +- name: Add iommu to kernel command line (Intel) + ansible.builtin.include_role: + name: stackhpc.grubcmdline + tags: skip_when_testing + vars: + kernel_cmdline: # noqa var-naming[no-role-prefix] + - intel_iommu=on + kernel_cmdline_remove: # noqa var-naming[no-role-prefix] + - ^intel_iommu= + kernel_restart_handler: "{{ sriov_restart_handler }}" + when: "'Intel' in ansible_facts.processor.0" + +- name: Set iommu=pt + ansible.builtin.include_role: + name: stackhpc.grubcmdline + tags: skip_when_testing + vars: + kernel_cmdline: # noqa var-naming[no-role-prefix] + - iommu=pt + kernel_cmdline_remove: # noqa var-naming[no-role-prefix] + - ^iommu= + kernel_restart_handler: "{{ sriov_restart_handler }}" diff --git a/roles/sriov/tasks/main.yml b/roles/sriov/tasks/main.yml new file mode 100644 index 0000000..4115556 --- /dev/null +++ b/roles/sriov/tasks/main.yml @@ -0,0 +1,3 @@ +--- +- name: Include action tasks + ansible.builtin._include: "{{ sriov_action | default('all') }}.yml" diff --git a/roles/sriov/tasks/mellanox.yml b/roles/sriov/tasks/mellanox.yml new file mode 100644 index 0000000..91a9bb7 --- /dev/null +++ b/roles/sriov/tasks/mellanox.yml @@ -0,0 +1,47 @@ +--- +- name: Determine list of pci addresses + ansible.builtin.command: grep PCI_SLOT_NAME /sys/class/net/{{ item }}/device/uevent + become: true + with_items: "{{ sriov_devices | map(attribute='name') | list }}" + register: network_devices + changed_when: false + +- name: Set default value for sriov_pci_addrs + ansible.builtin.set_fact: + sriov_pci_addrs: [] + +- name: Add set_fact containing list of pci_address + ansible.builtin.set_fact: + sriov_pci_addrs: "{{ sriov_pci_addrs + [pci_addr] }}" + vars: + pci_addr: "{{ item.stdout | regex_replace('^PCI_SLOT_NAME=', '') }}" + with_items: "{{ network_devices.results }}" + +- name: Display sriov_pci_addrs value + ansible.builtin.debug: + var: sriov_pci_addrs + +- name: Determine list of vendor ids + ansible.builtin.slurp: + src: /sys/class/net/{{ item }}/device/vendor + become: true + with_items: "{{ sriov_devices | map(attribute='name') | list }}" + register: vendors + changed_when: false + +- name: Set default value for sriov_pci_addrs + ansible.builtin.set_fact: + device_meta: "{{ sriov_devices | map(attribute='name') | zip(vendors.results | map(attribute='content') | map('b64decode') | map('trim'), sriov_pci_addrs, sriov_devices + | map(attribute='numvfs', default=sriov_numvfs)) | list }}" + +- name: Display device_meta variable + ansible.builtin.debug: + var: device_meta + +- name: Include tasks in mlxconfig.yml + ansible.builtin.include_tasks: mlxconfig.yml + vars: + pci_addr: "{{ item[2] }}" + sriov_numvfs: "{{ item[3] }}" + loop: "{{ device_meta }}" + when: item[1] in sriov_mellanox_vendor_ids diff --git a/roles/sriov/tasks/mlxconfig.yml b/roles/sriov/tasks/mlxconfig.yml new file mode 100644 index 0000000..32d2581 --- /dev/null +++ b/roles/sriov/tasks/mlxconfig.yml @@ -0,0 +1,76 @@ +--- +- name: Install MFT dependencies + ansible.builtin.yum: + name: "{{ sriov_os_pkgs }}" + become: true + when: sriov_install_enabled + +- name: Make working directory + ansible.builtin.file: + state: directory + path: "{{ sriov_workdir }}" + mode: "0700" + when: sriov_install_enabled + +- name: Download MFT + vars: + # Strip the file extension + no_ext: "{{ (sriov_mft_tarball | splitext)[0] }}" + ansible.builtin.unarchive: + src: "{{ sriov_mft_url }}" + dest: "{{ sriov_workdir }}" + remote_src: true + creates: "{{ sriov_workdir }}/{{ no_ext }}" + when: sriov_install_enabled + +- name: Determine if we can run mst_status + ansible.builtin.command: mst status + become: true + failed_when: false + changed_when: false + register: mst_status_result + +- name: Install MFT + ansible.builtin.command: "{{ sriov_workdir }}/{{ no_ext }}/install.sh" + vars: + # Strip the file extension + no_ext: "{{ (sriov_mft_tarball | splitext)[0] }}" + become: true + register: mft_install_result + changed_when: true + when: + - sriov_install_enabled + - mst_status_result.rc != 0 + +- name: "Run: mst start" + command: mst start + become: true + tags: + - skip_ansible_lint + - skip_when_testing + when: mft_install_result.changed + +- name: Check current settings + ansible.builtin.command: mlxconfig -d {{ pci_addr }} q + register: mlxconfig_result + become: true + changed_when: false + +- name: Enable SR-IOV with mlxconfig + ansible.builtin.command: mlxconfig -y -d {{ pci_addr }} set SRIOV_EN=1 + become: true + vars: + sriov_enabled: "{{ mlxconfig_result.stdout | regex_search('^.*SRIOV_EN.*True.*$', multiline=True) }}" + when: not sriov_enabled + notify: "{{ sriov_restart_handler }}" + changed_when: true + +- name: Set numvfs with mlxconfig + ansible.builtin.command: mlxconfig -y -d {{ pci_addr }} set NUM_OF_VFS={{ sriov_numvfs }} + become: true + vars: + regex: ^.*NUM_OF_VFS.*?{{ sriov_numvfs }}.*$ + num_of_vfs: "{{ mlxconfig_result.stdout | regex_search(regex, multiline=True) }}" + when: not num_of_vfs + notify: "{{ sriov_restart_handler }}" + changed_when: true diff --git a/roles/sriov/vars/main.yml b/roles/sriov/vars/main.yml new file mode 100644 index 0000000..4a8359d --- /dev/null +++ b/roles/sriov/vars/main.yml @@ -0,0 +1,2 @@ +--- +# vars file for stackhpc.sriov