-
Notifications
You must be signed in to change notification settings - Fork 3
78 lines (70 loc) · 2.34 KB
/
nvidia_workflow.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
name: NVIDIA PyTorch/CUDA Job
on:
workflow_dispatch:
inputs:
script_content:
description: 'Content of Python/CUDA script (.py or .cu file)'
required: true
type: string
filename:
description: 'Name of script (supports .py or .cu)'
required: true
type: string
jobs:
train:
runs-on: [gpumode-nvidia-arc]
timeout-minutes: 10
container:
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
steps:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Setup Python environment
run: |
uv venv .venv
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
echo "$PWD/.venv/bin" >> $GITHUB_PATH
- name: Create script file
run: |
cat << 'EOL' > ${{ github.event.inputs.filename }}
${{ github.event.inputs.script_content }}
EOL
cat ${{ github.event.inputs.filename }} # Debug: show file contents
- name: Install dependencies
run: |
if grep -rE "(import torch|from torch)" "${{ github.event.inputs.filename }}"; then
echo "PyTorch detected, installing torch"
uv pip install numpy torch
fi
if grep -rE "(import triton|from triton)" "${{ github.event.inputs.filename }}"; then
echo "Triton detected, installing triton"
uv pip install triton
fi
- name: Run script
shell: bash
run: |
if [[ "${{ github.event.inputs.filename }}" == *.cu ]]; then
echo "Compiling and running CUDA file..."
nvcc "${{ github.event.inputs.filename }}" -o cuda_program
./cuda_program > training.log 2>&1
else
echo "Running Python file..."
python3 "${{ github.event.inputs.filename }}" > training.log 2>&1
fi
cat training.log # Debug: show output
- name: Upload training artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: training-artifacts
path: |
training.log
${{ github.event.inputs.filename }}
env:
CUDA_VISIBLE_DEVICES: 0