From 551ad9204a430481b7c199ee14004d4a7105436f Mon Sep 17 00:00:00 2001 From: greg pereira Date: Wed, 1 May 2024 19:09:50 -0700 Subject: [PATCH 1/5] test I still have access to the box Signed-off-by: greg pereira --- .github/workflows/training-e2e.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index a9dd25d4..31307248 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -90,6 +90,10 @@ jobs: env: ANSIBLE_HOST_KEY_CHECKING: false + - name: log some networking information + run: | + ifconfig + - name: Destroy Test Environment id: down if: always() From 07027382b0a7c18218bc726218ae5d9d23e4700d Mon Sep 17 00:00:00 2001 From: greg pereira Date: Wed, 1 May 2024 20:48:16 -0700 Subject: [PATCH 2/5] check access to ilab Signed-off-by: greg pereira --- .github/workflows/training-e2e.yaml | 20 ++++-- training/Makefile | 1 + training/provision/playbook.yml | 94 ++++++++++++++++++++++------- 3 files changed, 87 insertions(+), 28 deletions(-) diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index 31307248..e69b5cc5 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -15,9 +15,9 @@ on: env: TF_VAR_aws_region: "eu-west-2" - TF_VAR_aws_ami_owners: '["125523088429"]' - TF_VAR_aws_ami_name: '["Fedora-Cloud-Base-*"]' - TF_VAR_aws_volume_size: 128 + TF_VAR_aws_ami_owners: '["309956199498"]' + TF_VAR_aws_ami_name: '["*RHEL-9.4*"]' + TF_VAR_aws_volume_size: 500 TF_VAR_aws_access_key: ${{ secrets.AWS_ACCESS_KEY_ID }} TF_VAR_aws_secret_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -74,6 +74,7 @@ jobs: run: | echo "id=$(terraform output id | xargs)" >> $GITHUB_OUTPUT echo "url=$(terraform output host | xargs)" >> $GITHUB_OUTPUT + echo "ssh_public_key=$(terraform output ssh_public_key | xargs)" >> $GITHUB_OUTPUT echo "pem_filename=$(terraform output pem_filename | xargs)" >> $GITHUB_OUTPUT working-directory: terraform-test-environment-module @@ -83,16 +84,23 @@ jobs: - name: Provision run: | + ssh_public_key=$(printf '%s\n' "${{ steps.terraform-output.outputs.ssh_public_key }}" | sed -e 's/[\/&]/\\&/g') ansible-playbook ./main/training/provision/playbook.yml \ + -vvv \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ - --extra-vars "image_name=${{ matrix.image_name }}" + --extra-vars "image_name=${{ matrix.image_name }}" \ + --extra-vars "ssh_public_key=$ssh_public_key" \ + --extra-vars "registry_user=${{ secrets.REGISTRY_USER }}" \ + --extra-vars "registry_password=${{ secrets.REGISTRY_PASSWORD }}" env: ANSIBLE_HOST_KEY_CHECKING: false - - name: log some networking information + - name: run the e2e tests run: | - ifconfig + ssh -i terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ + ${{ steps.terraform-output.outputs.url }} \ + curl -sLO /tmp https://raw.githubusercontent.com/stefwalter/instructlab/container-testing/scripts/basic-workflow-tests.sh - name: Destroy Test Environment id: down diff --git a/training/Makefile b/training/Makefile index 20f98ee9..6710838f 100644 --- a/training/Makefile +++ b/training/Makefile @@ -108,5 +108,6 @@ disk-intel: disk-nvidia: make -C nvidia-bootc/ bootc-image-builder +.PHONY: clean clean: rm -rf build diff --git a/training/provision/playbook.yml b/training/provision/playbook.yml index 566afc6d..bd2ab9af 100644 --- a/training/provision/playbook.yml +++ b/training/provision/playbook.yml @@ -1,7 +1,7 @@ --- - name: Test Environment Provisioning hosts: test_environments - remote_user: fedora + remote_user: ec2-user become: true gather_facts: false @@ -20,24 +20,74 @@ name: podman state: present - - name: Bootc install - ignore_unreachable: true - containers.podman.podman_container: - name: "{{ image_name }}" - image: "quay.io/ai-lab/{{ image_name }}:latest" - state: started - auto_remove: yes - cap_add: - - sys_admin - security_opt: - - "label=type:unconfined_t" - volumes: - - "/:/target" - - "/var/lib/containers:/var/lib/containers" - privileged: yes - pid_mode: host - command: "bootc install to-filesystem --karg=console=ttyS0,115200n8 --replace=alongside /target" - - - name: Restart instance - ansible.builtin.reboot: - test_command: pwd + - name: Get size of root filesystem + ansible.builtin.shell: + cmd: | + df -h /var/tmp/ + podman system info + + - name: Login to default registry and create ${XDG_RUNTIME_DIR}/containers/auth.json + containers.podman.podman_login: + username: "{{ registry_user }}" + password: "{{ registry_password }}" + registry: quay.io + + # - name: Building an image with ssh key + # ignore_unreachable: true + # containers.podman.podman_container: + # name: localhost/temp_image + # image: "quay.io/ai-lab/{{ image_name }}:latest" + # command: + # - mkdir /usr/etc-system + # - test -n "{{ ssh_public_key }}" + # - echo 'AuthorizedKeysFile /usr/etc-system/%u.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf + # - echo "{{ ssh_public_key }}"" > /usr/etc-system/ec2-user.keys && chmod 0600 /usr/etc-system/ec2-user.keys + # state: present + # recreate: true + + - name: Download the dockerfile for SSH wrapper container + ansible.builtin.get_url: + url: "https://gist.githubusercontent.com/Gregory-Pereira/235943787f8fd1586852debe11725fc4/raw/c0aa0a09e55def3b3f42e8130e328b0170a22141/Containerfile" + dest: "/tmp/Containerfile" + + - name: Sed SSH key into Containerfile + ansible.builtin.command: + cmd: | + sed -i 's|REPLACE_ME|{{ ssh_public_key }}|g' /tmp/Containerfile && \ + sed -i 's|BASE_IMAGE|{{ image_name }}|g' /tmp/Containerfile && \ + cat /tmp/Containerfile && \ + whoami + environment: + ssh_public_key: "{{ ssh_public_key }}" + + - name: Building an image with ssh key + ansible.builtin.command: + cmd: | + podman build -t localhost/temp_image:latest -f /tmp/Containerfile + + # - name: check podman images for sanity + # ansible.builtin.command: + # cmd: | + # podman images + + # - name: Bootc install + # ignore_unreachable: true + # containers.podman.podman_container: + # name: "{{ image_name }}" + # image: localhost/temp_image:latest + # state: started + # auto_remove: yes + # cap_add: + # - sys_admin + # security_opt: + # - "label=type:unconfined_t" + # volumes: + # - "/:/target" + # - "/var/lib/containers:/var/lib/containers" + # privileged: yes + # pid_mode: host + # command: "bootc install to-filesystem --karg=console=ttyS0,115200n8 --replace=alongside /target" + + # - name: Restart instance + # ansible.builtin.reboot: + # test_command: which ilab From 16f383305d75d17f4a1cc023ff0c9716e4a521bc Mon Sep 17 00:00:00 2001 From: greg pereira Date: Wed, 1 May 2024 20:48:16 -0700 Subject: [PATCH 3/5] check access to ilab Signed-off-by: greg pereira --- .github/workflows/training-e2e.yaml | 5 +- training/provision/ansible.cfg | 2 + training/provision/playbook.yml | 90 +++++++++++-------- training/provision/templates/Containerfile.j2 | 9 ++ 4 files changed, 67 insertions(+), 39 deletions(-) create mode 100644 training/provision/ansible.cfg create mode 100644 training/provision/templates/Containerfile.j2 diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index e69b5cc5..cc75fcd0 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -84,13 +84,14 @@ jobs: - name: Provision run: | - ssh_public_key=$(printf '%s\n' "${{ steps.terraform-output.outputs.ssh_public_key }}" | sed -e 's/[\/&]/\\&/g') + cp ./main/training/provision/ansible.cfg ./ && \ ansible-playbook ./main/training/provision/playbook.yml \ -vvv \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ + --extra-vars "ansible_ssh_timeout=1800" \ --extra-vars "image_name=${{ matrix.image_name }}" \ - --extra-vars "ssh_public_key=$ssh_public_key" \ + --extra-vars "ssh_public_key='${{ steps.terraform-output.outputs.ssh_public_key }}'" \ --extra-vars "registry_user=${{ secrets.REGISTRY_USER }}" \ --extra-vars "registry_password=${{ secrets.REGISTRY_PASSWORD }}" env: diff --git a/training/provision/ansible.cfg b/training/provision/ansible.cfg new file mode 100644 index 00000000..d6570108 --- /dev/null +++ b/training/provision/ansible.cfg @@ -0,0 +1,2 @@ +[ssh_connection] +ssh_args = -o ControlMaster=auto -o ControlPersist=1800 diff --git a/training/provision/playbook.yml b/training/provision/playbook.yml index bd2ab9af..83de071a 100644 --- a/training/provision/playbook.yml +++ b/training/provision/playbook.yml @@ -15,55 +15,71 @@ - name: Gather facts for first time ansible.builtin.setup: - - name: Check Podman Present - ansible.builtin.package: - name: podman + - name: Required packages + ansible.builtin.dnf: + name: + - podman state: present + disable_gpg_check: true - - name: Get size of root filesystem - ansible.builtin.shell: - cmd: | - df -h /var/tmp/ - podman system info + - name: Temp Image Containerfile + ansible.builtin.template: + src: ./templates/Containerfile.j2 + dest: /tmp/Containerfile - - name: Login to default registry and create ${XDG_RUNTIME_DIR}/containers/auth.json + - name: Login to default registry containers.podman.podman_login: username: "{{ registry_user }}" password: "{{ registry_password }}" registry: quay.io + authfile: /etc/containers/auth.json - # - name: Building an image with ssh key - # ignore_unreachable: true - # containers.podman.podman_container: - # name: localhost/temp_image - # image: "quay.io/ai-lab/{{ image_name }}:latest" - # command: - # - mkdir /usr/etc-system - # - test -n "{{ ssh_public_key }}" - # - echo 'AuthorizedKeysFile /usr/etc-system/%u.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf - # - echo "{{ ssh_public_key }}"" > /usr/etc-system/ec2-user.keys && chmod 0600 /usr/etc-system/ec2-user.keys - # state: present - # recreate: true - - - name: Download the dockerfile for SSH wrapper container - ansible.builtin.get_url: - url: "https://gist.githubusercontent.com/Gregory-Pereira/235943787f8fd1586852debe11725fc4/raw/c0aa0a09e55def3b3f42e8130e328b0170a22141/Containerfile" - dest: "/tmp/Containerfile" + - name: Pull the parent image + async: 1000 + poll: 0 + register: pull_result + containers.podman.podman_image: + name: "quay.io/ai-lab/{{ image_name }}:latest" + pull: true + auth_file: /etc/containers/auth.json + arch: amd64 + state: present - - name: Sed SSH key into Containerfile - ansible.builtin.command: - cmd: | - sed -i 's|REPLACE_ME|{{ ssh_public_key }}|g' /tmp/Containerfile && \ - sed -i 's|BASE_IMAGE|{{ image_name }}|g' /tmp/Containerfile && \ - cat /tmp/Containerfile && \ - whoami - environment: - ssh_public_key: "{{ ssh_public_key }}" + - name: Check on parent image pull + async_status: + jid: "{{ pull_result.ansible_job_id }}" + register: job_result + until: job_result.finished + retries: 100 + delay: 10 - - name: Building an image with ssh key + - name: Check the images was pulled ansible.builtin.command: cmd: | - podman build -t localhost/temp_image:latest -f /tmp/Containerfile + podman images -a + + # - name: Simulate long running op (15 sec), wait for up to 45 sec, poll every 5 sec + # ansible.builtin.command: /bin/sleep 15 + # async: 45 + # poll: 5 + + # - name: Build Temp image + # containers.podman.podman_image: + # name: "quay.io/ai-lab/{{ image_name }}:latest" + # build: + # target: temp_image + # file: /tmp/Containerfile + # state: build + # auth_file: /etc/containers/auth.json + + # - name: Pause for 5 minutes + # ansible.builtin.pause: + # minutes: 5 + + # - name: Building an image with ssh key + # ansible.builtin.command: + # cmd: | + # podman build -t localhost/temp_image:latest -f /tmp/Containerfile # - name: check podman images for sanity # ansible.builtin.command: diff --git a/training/provision/templates/Containerfile.j2 b/training/provision/templates/Containerfile.j2 new file mode 100644 index 00000000..061a0b92 --- /dev/null +++ b/training/provision/templates/Containerfile.j2 @@ -0,0 +1,9 @@ +FROM quay.io/ai-lab/{{ image_name }}:latest + +USER root + +RUN mkdir /usr/etc-system && \ + chown -R root:root /usr/etc-system && \ + echo 'AuthorizedKeysFile /usr/etc-system/root.keys' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \ + echo {{ ssh_public_key }} > /usr/etc-system/root.keys && \ + chmod 0600 /usr/etc-system/root.keys From 0eba1abe57915985b22fe8ef5b1cf80699220b87 Mon Sep 17 00:00:00 2001 From: greg pereira Date: Fri, 3 May 2024 11:58:36 -0700 Subject: [PATCH 4/5] removing ssh timeout and ansible.cfg Signed-off-by: greg pereira --- .github/workflows/training-e2e.yaml | 1 - training/provision/ansible.cfg | 2 -- 2 files changed, 3 deletions(-) delete mode 100644 training/provision/ansible.cfg diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index cc75fcd0..686c7261 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -89,7 +89,6 @@ jobs: -vvv \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ - --extra-vars "ansible_ssh_timeout=1800" \ --extra-vars "image_name=${{ matrix.image_name }}" \ --extra-vars "ssh_public_key='${{ steps.terraform-output.outputs.ssh_public_key }}'" \ --extra-vars "registry_user=${{ secrets.REGISTRY_USER }}" \ diff --git a/training/provision/ansible.cfg b/training/provision/ansible.cfg deleted file mode 100644 index d6570108..00000000 --- a/training/provision/ansible.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[ssh_connection] -ssh_args = -o ControlMaster=auto -o ControlPersist=1800 From bcf522f80cd4881d13574ee24d81c5791d8ce27a Mon Sep 17 00:00:00 2001 From: greg pereira Date: Fri, 3 May 2024 11:58:36 -0700 Subject: [PATCH 5/5] removing ssh timeout and ansible.cfg Signed-off-by: greg pereira --- .github/workflows/training-e2e.yaml | 26 +++++----- training/provision/playbook.yml | 79 +++++------------------------ 2 files changed, 25 insertions(+), 80 deletions(-) diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index 686c7261..6f003872 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -84,9 +84,7 @@ jobs: - name: Provision run: | - cp ./main/training/provision/ansible.cfg ./ && \ ansible-playbook ./main/training/provision/playbook.yml \ - -vvv \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ --extra-vars "image_name=${{ matrix.image_name }}" \ @@ -99,7 +97,7 @@ jobs: - name: run the e2e tests run: | ssh -i terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ - ${{ steps.terraform-output.outputs.url }} \ + root@${{ steps.terraform-output.outputs.url }} \ curl -sLO /tmp https://raw.githubusercontent.com/stefwalter/instructlab/container-testing/scripts/basic-workflow-tests.sh - name: Destroy Test Environment @@ -111,14 +109,14 @@ jobs: TF_VAR_aws_instance_type: ${{ matrix.aws_image_type }} TF_VAR_aws_ami_architecture: ${{ matrix.aws_ami_architecture }} - - name: Publish Job Results to Slack - id: slack - if: always() - uses: slackapi/slack-github-action@v1.26.0 - with: - payload: | - { - "text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - } - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + # - name: Publish Job Results to Slack + # id: slack + # if: always() + # uses: slackapi/slack-github-action@v1.26.0 + # with: + # payload: | + # { + # "text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + # } + # env: + # SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} diff --git a/training/provision/playbook.yml b/training/provision/playbook.yml index 83de071a..c40be8e6 100644 --- a/training/provision/playbook.yml +++ b/training/provision/playbook.yml @@ -18,11 +18,12 @@ - name: Required packages ansible.builtin.dnf: name: + - https://s3.eu-west-2.amazonaws.com/amazon-ssm-eu-west-2/latest/linux_amd64/amazon-ssm-agent.rpm - podman state: present disable_gpg_check: true - - name: Temp Image Containerfile + - name: Derived Image Containerfile ansible.builtin.template: src: ./templates/Containerfile.j2 dest: /tmp/Containerfile @@ -34,76 +35,22 @@ registry: quay.io authfile: /etc/containers/auth.json - - name: Pull the parent image + - name: Build and Install the Bootc Image async: 1000 poll: 0 - register: pull_result - containers.podman.podman_image: - name: "quay.io/ai-lab/{{ image_name }}:latest" - pull: true - auth_file: /etc/containers/auth.json - arch: amd64 - state: present - - - name: Check on parent image pull + register: build_result + ansible.builtin.shell: | + podman build -t derived_image:latest -f /tmp/Containerfile --authfile=/etc/containers/auth.json . + podman run --rm --privileged --pid=host --security-opt label=type:unconfined_t -v /:/target -v /var/lib/containers:/var/lib/containers derived_image:latest bootc install to-filesystem --karg=console=ttyS0,115200n8 --replace=alongside /target + + - name: Check on Build and Install Bootc Image async_status: - jid: "{{ pull_result.ansible_job_id }}" + jid: "{{ build_result.ansible_job_id }}" register: job_result until: job_result.finished retries: 100 delay: 10 - - name: Check the images was pulled - ansible.builtin.command: - cmd: | - podman images -a - - # - name: Simulate long running op (15 sec), wait for up to 45 sec, poll every 5 sec - # ansible.builtin.command: /bin/sleep 15 - # async: 45 - # poll: 5 - - # - name: Build Temp image - # containers.podman.podman_image: - # name: "quay.io/ai-lab/{{ image_name }}:latest" - # build: - # target: temp_image - # file: /tmp/Containerfile - # state: build - # auth_file: /etc/containers/auth.json - - # - name: Pause for 5 minutes - # ansible.builtin.pause: - # minutes: 5 - - # - name: Building an image with ssh key - # ansible.builtin.command: - # cmd: | - # podman build -t localhost/temp_image:latest -f /tmp/Containerfile - - # - name: check podman images for sanity - # ansible.builtin.command: - # cmd: | - # podman images - - # - name: Bootc install - # ignore_unreachable: true - # containers.podman.podman_container: - # name: "{{ image_name }}" - # image: localhost/temp_image:latest - # state: started - # auto_remove: yes - # cap_add: - # - sys_admin - # security_opt: - # - "label=type:unconfined_t" - # volumes: - # - "/:/target" - # - "/var/lib/containers:/var/lib/containers" - # privileged: yes - # pid_mode: host - # command: "bootc install to-filesystem --karg=console=ttyS0,115200n8 --replace=alongside /target" - - # - name: Restart instance - # ansible.builtin.reboot: - # test_command: which ilab + - name: Restart instance + ansible.builtin.reboot: + test_command: which ilab