diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 000000000..cda5accf5 --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,30 @@ +# AI-Lab Recipes Infrastructure Documentation + +## Standard Wofklows + +Our standard workflows deal with building components and pushing their images to `quay.io/ai-lab`. These components include: + - recipe applications: + - Chatbot + - Codegen + - Summarizer + - RAG + - model_servers + - models + - instructlab workflows + - training bootc workflows + +For a full list of the images we build check out or [quay organization](https://quay.io/organization/ai-lab). These standard workflows should all be run against our standard repo `containers/ai-labs-recipes` rather than the mirror repo. + +## Testing frameworks + +Our testing frameworks are a bit different from our standard workflows. In terms of compute, some of these jobs run either AWS machines provisioned via terraform using secrets in the github repository, or customized github hosted action runners, as well as the standard ubuntu-22.04 github runners for jobs not requiring additional resources. + +These workflows start by checking out the [terraform-test-environment-module](https://github.com/containers/terraform-test-environment-module) repo, as well as the code in `containers/ai-lab-recipes` at the `main` branch. Then it will provision the terraform instance, install the correct ansible playbook requirements, and runs a coressponding playbook. Aditional actions may also be taken depending on the testing framework in question. + +Finally all of our testing framework workflows will call `terraform destroy` to remove the aws instance we have provisioned and publish the results of the workflow to slack. + +IMPORTATNT: If you are doing development and testing, please make sure that instances in AWS are spun down before leaving if you have access to the AWS account. + +### testing_bootc specific pieces + +This workflow is based off of `Fedroa 40`. It provisions a `g5.8xlarge` AWS EC2 instance. In the ansible playbook for this workflow, it will first check that the only requirement -- podman -- is installed to before running the bootc install. \ No newline at end of file diff --git a/.github/workflows/bootc_testing_framework.yaml b/.github/workflows/bootc_testing_framework.yaml index d669371b8..29125edb5 100644 --- a/.github/workflows/bootc_testing_framework.yaml +++ b/.github/workflows/bootc_testing_framework.yaml @@ -17,12 +17,9 @@ env: TF_VAR_aws_region: "eu-west-2" TF_VAR_aws_ami_owners: '["125523088429"]' TF_VAR_aws_ami_name: '["Fedora-Cloud-Base-*"]' - TF_VAR_aws_volume_size: 25 - # TF_VAR_aws_volume_size: 128 + TF_VAR_aws_volume_size: 128 TF_VAR_aws_access_key: ${{ secrets.AWS_ACCESS_KEY_ID }} TF_VAR_aws_secret_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} concurrency: group: ${{ github.workflow }} @@ -37,7 +34,7 @@ jobs: matrix: include: - arch: amd64 - aws_image_type: t2.micro + aws_image_type: g5.8xlarge image_name: nvidia-bootc aws_ami_architecture: x86_64 steps: @@ -88,10 +85,10 @@ jobs: run: | ansible-playbook ./main/training/provision/playbook.yml \ -i terraform-test-environment-module/hosts.ini \ - --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} + --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ + --extra-vars "image_name=${{ matrix.image_name }}" env: ANSIBLE_HOST_KEY_CHECKING: false - image_name: ${{ matrix.image_name }} - name: Destroy Test Environment id: down diff --git a/training/provision/playbook.yml b/training/provision/playbook.yml index b1e820d6c..746e39a24 100644 --- a/training/provision/playbook.yml +++ b/training/provision/playbook.yml @@ -15,7 +15,25 @@ - name: Gather facts for first time ansible.builtin.setup: - - name: Required Packages + - name: Check Podman Present ansible.builtin.package: name: podman state: present + + - name: Bootc install + ignore_unreachable: true + containers.podman.podman_container: + name: "{{ image_name }}" + image: "quay.io/ai-lab/{{ image_name }}:latest" + state: started + auto_remove: yes + cap_add: + - sys_admin + security_opt: + - "label=type:unconfined_t" + volumes: + - "/:/target" + - "/var/lib/containers:/var/lib/containers" + privileged: yes + pid_mode: host + command: "bootc install to-filesystem --karg=console=ttyS0,115200n8 --replace=alongside /target"