diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index 34c59598..cc505ed3 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -10,7 +10,7 @@ on: paths: - .github/workflows/training-e2e.yaml - ./training/** - + workflow_dispatch: env: @@ -20,6 +20,7 @@ env: TF_VAR_aws_volume_size: 500 TF_VAR_aws_access_key: ${{ secrets.AWS_ACCESS_KEY_ID }} TF_VAR_aws_secret_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} concurrency: group: ${{ github.workflow }} @@ -69,7 +70,7 @@ jobs: env: TF_VAR_aws_instance_type: ${{ matrix.aws_image_type }} TF_VAR_aws_ami_architecture: ${{ matrix.aws_ami_architecture }} - + - name: Terraform Output id: terraform-output run: | @@ -95,16 +96,13 @@ jobs: env: ANSIBLE_CONFIG: ./main/training/tests/ansible.cfg - - name: Wait for 4 minutes - run: sleep 240 - # - name: Setup tmate session # uses: mxschmitt/action-tmate@v3.18 # timeout-minutes: 20 # with: # detached: true # limit-access-to-actor: false - + - name: Setup tmate session uses: mxschmitt/action-tmate@v3.18 timeout-minutes: 60 @@ -114,9 +112,10 @@ jobs: - name: Run tests run: | - ansible-playbook ./main/training/tests/e2e-tests/playbook.yml \ + ansible-playbook ./main/training/tests/e2e-tests/playbook.yml -vvv \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ + --extra-vars "HF_TOKEN=${{ secrets.HF_TOKEN }}" \ --extra-vars "image_name=${{ matrix.image_name }}" \ --extra-vars "ssh_public_key='${{ steps.terraform-output.outputs.ssh_public_key }}'" \ --extra-vars "registry_user=${{ secrets.REGISTRY_USER }}" \ @@ -141,7 +140,7 @@ jobs: env: TF_VAR_aws_instance_type: ${{ matrix.aws_image_type }} TF_VAR_aws_ami_architecture: ${{ matrix.aws_ami_architecture }} - + - name: Publish Job Results to Slack id: slack if: always() diff --git a/training/tests/e2e-tests/playbook.yml b/training/tests/e2e-tests/playbook.yml index dd07b765..4d2102d1 100644 --- a/training/tests/e2e-tests/playbook.yml +++ b/training/tests/e2e-tests/playbook.yml @@ -6,12 +6,12 @@ gather_facts: false tasks: - + - name: Wait until the instance is ready ansible.builtin.wait_for_connection: delay: 15 timeout: 180 - + - name: Gather facts for first time ansible.builtin.setup: @@ -20,6 +20,8 @@ url: https://raw.githubusercontent.com/instructlab/instructlab/main/scripts/basic-workflow-tests.sh dest: /tmp/basic-workflow-tests.sh mode: 755 + environment: + HF_TOKEN: "{{ HF_TOKEN }}" # Allow for debugging with tmate # - name: Wait for 15 minutes diff --git a/training/tests/provision/playbook.yml b/training/tests/provision/playbook.yml index 1181e34c..66426cf2 100644 --- a/training/tests/provision/playbook.yml +++ b/training/tests/provision/playbook.yml @@ -6,12 +6,12 @@ gather_facts: false tasks: - + - name: Wait until the instance is ready ansible.builtin.wait_for_connection: delay: 15 timeout: 180 - + - name: Gather facts for first time ansible.builtin.setup: @@ -42,7 +42,7 @@ ansible.builtin.shell: | podman pull "quay.io/ai-lab/{{ image_name }}:latest" \ --authfile=/etc/containers/auth.json \ - --arch amd64 + --arch amd64 # --retry=3 \ # --retry-delay=15 \ @@ -112,6 +112,5 @@ delegate_to: localhost - name: Reboot - ansible.builtin.shell: systemctl reboot - ignore_errors: true - ignore_unreachable: true + ansible.builtin.reboot: + reboot_timeout: 300