From fb1c062bc8f7e5df9a9ef167d776100f7819d338 Mon Sep 17 00:00:00 2001 From: greg pereira Date: Wed, 1 May 2024 10:47:32 -0700 Subject: [PATCH] upping worker and running bootc install Signed-off-by: greg pereira --- .github/workflows/README.md | 30 +++++++++++++++++++ .../workflows/bootc_testing_framework.yaml | 11 +++---- training/provision/playbook.yml | 20 ++++++++++++- 3 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/README.md diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 00000000..cda5accf --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,30 @@ +# AI-Lab Recipes Infrastructure Documentation + +## Standard Wofklows + +Our standard workflows deal with building components and pushing their images to `quay.io/ai-lab`. These components include: + - recipe applications: + - Chatbot + - Codegen + - Summarizer + - RAG + - model_servers + - models + - instructlab workflows + - training bootc workflows + +For a full list of the images we build check out or [quay organization](https://quay.io/organization/ai-lab). These standard workflows should all be run against our standard repo `containers/ai-labs-recipes` rather than the mirror repo. + +## Testing frameworks + +Our testing frameworks are a bit different from our standard workflows. In terms of compute, some of these jobs run either AWS machines provisioned via terraform using secrets in the github repository, or customized github hosted action runners, as well as the standard ubuntu-22.04 github runners for jobs not requiring additional resources. + +These workflows start by checking out the [terraform-test-environment-module](https://github.com/containers/terraform-test-environment-module) repo, as well as the code in `containers/ai-lab-recipes` at the `main` branch. Then it will provision the terraform instance, install the correct ansible playbook requirements, and runs a coressponding playbook. Aditional actions may also be taken depending on the testing framework in question. + +Finally all of our testing framework workflows will call `terraform destroy` to remove the aws instance we have provisioned and publish the results of the workflow to slack. + +IMPORTATNT: If you are doing development and testing, please make sure that instances in AWS are spun down before leaving if you have access to the AWS account. + +### testing_bootc specific pieces + +This workflow is based off of `Fedroa 40`. It provisions a `g5.8xlarge` AWS EC2 instance. In the ansible playbook for this workflow, it will first check that the only requirement -- podman -- is installed to before running the bootc install. \ No newline at end of file diff --git a/.github/workflows/bootc_testing_framework.yaml b/.github/workflows/bootc_testing_framework.yaml index d669371b..29125edb 100644 --- a/.github/workflows/bootc_testing_framework.yaml +++ b/.github/workflows/bootc_testing_framework.yaml @@ -17,12 +17,9 @@ env: TF_VAR_aws_region: "eu-west-2" TF_VAR_aws_ami_owners: '["125523088429"]' TF_VAR_aws_ami_name: '["Fedora-Cloud-Base-*"]' - TF_VAR_aws_volume_size: 25 - # TF_VAR_aws_volume_size: 128 + TF_VAR_aws_volume_size: 128 TF_VAR_aws_access_key: ${{ secrets.AWS_ACCESS_KEY_ID }} TF_VAR_aws_secret_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} concurrency: group: ${{ github.workflow }} @@ -37,7 +34,7 @@ jobs: matrix: include: - arch: amd64 - aws_image_type: t2.micro + aws_image_type: g5.8xlarge image_name: nvidia-bootc aws_ami_architecture: x86_64 steps: @@ -88,10 +85,10 @@ jobs: run: | ansible-playbook ./main/training/provision/playbook.yml \ -i terraform-test-environment-module/hosts.ini \ - --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} + --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ + --extra-vars "image_name=${{ matrix.image_name }}" env: ANSIBLE_HOST_KEY_CHECKING: false - image_name: ${{ matrix.image_name }} - name: Destroy Test Environment id: down diff --git a/training/provision/playbook.yml b/training/provision/playbook.yml index b1e820d6..746e39a2 100644 --- a/training/provision/playbook.yml +++ b/training/provision/playbook.yml @@ -15,7 +15,25 @@ - name: Gather facts for first time ansible.builtin.setup: - - name: Required Packages + - name: Check Podman Present ansible.builtin.package: name: podman state: present + + - name: Bootc install + ignore_unreachable: true + containers.podman.podman_container: + name: "{{ image_name }}" + image: "quay.io/ai-lab/{{ image_name }}:latest" + state: started + auto_remove: yes + cap_add: + - sys_admin + security_opt: + - "label=type:unconfined_t" + volumes: + - "/:/target" + - "/var/lib/containers:/var/lib/containers" + privileged: yes + pid_mode: host + command: "bootc install to-filesystem --karg=console=ttyS0,115200n8 --replace=alongside /target"