Skip to content

Training Workflow

Training Workflow #124

Workflow file for this run

name: NVIDIA PyTorch Job
on:
workflow_dispatch:
inputs:
script_content:
description: 'Content of train.py'
required: true
type: string
jobs:
train:
runs-on: [gpumode-nvidia-arc]
container:
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
steps:
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Create training script
shell: python
run: |
with open('train.py', 'w') as f:
f.write('''${{ github.event.inputs.script_content }}''')
- name: Install dependencies
run: |
if grep -rE "(import numpy|from numpy)" train.py; then
echo "Numpy detected, installing numpy"
pip install numpy
fi
# Check if 'import torch' is in any Python file
if grep -rE "(import torch|from torch)" train.py; then
echo "PyTorch detected, installing torch"
pip install torch
fi
# Check if 'import triton' is in any Python file
if grep -rE "(import triton|from triton)" train.py; then
echo "Triton detected, installing triton"
pip install triton
fi
- name: Run training script
run: |
python train.py > training.log 2>&1
- name: Upload artifacts
uses: actions/upload-artifact@v3
if: always()
with:
name: training-artifacts
path: |
training.log
train.py
env:
CUDA_VISIBLE_DEVICES: 0 # Make sure only one GPU is used for testing