From e5e549d0128e4b59185d96b8eace60bfd8a3d45d Mon Sep 17 00:00:00 2001 From: alexzhang13 Date: Mon, 11 Nov 2024 21:39:42 -0500 Subject: [PATCH] Add Numpy / Torch / Triton differentiation (#18) * Add Flags for installing different deps * Handle the "from ..." case for imports" --------- Co-authored-by: Alex Zhang Co-authored-by: Mark Saroufim --- .github/workflows/train_workflow.yml | 20 ++++++++++++++++++-- train.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/.github/workflows/train_workflow.yml b/.github/workflows/train_workflow.yml index be54465..afb96b7 100644 --- a/.github/workflows/train_workflow.yml +++ b/.github/workflows/train_workflow.yml @@ -21,8 +21,24 @@ jobs: - name: Install dependencies run: | - pip install numpy - pip install torch + # Check if 'import numpy' is in any Python file + if grep -rE "(import numpy|from numpy)" train.py; then + echo "Numpy detected, installing numpy" + pip install numpy + fi + + # Check if 'import torch' is in any Python file + if grep -rE "(import torch|from torch)" train.py; then + echo "PyTorch detected, installing torch" + pip install torch + fi + + # Check if 'import triton' is in any Python file + if grep -rE "(import triton|from triton)" train.py; then + echo "Triton detected, installing triton" + pip install triton + + fi - name: Create training script shell: python diff --git a/train.py b/train.py index 3a79f98..99a07f8 100644 --- a/train.py +++ b/train.py @@ -1,8 +1,32 @@ +import triton.language as tl +import triton import torch + +@triton.jit +def vector_add_kernel(A, B, C, N, BLOCK_SIZE: tl.constexpr): + # Get the unique program ID for each block + pid = tl.program_id(0) + + # Calculate the start index for each block + start = pid * BLOCK_SIZE + + # Load data from A and B into registers for vector addition + offset = start + tl.arange(0, BLOCK_SIZE) + a = tl.load(A + offset, mask=offset < N) # Load elements from A + b = tl.load(B + offset, mask=offset < N) # Load elements from B + + # Perform element-wise addition + c = a + b + + # Store the result back into C + tl.store(C + offset, c, mask=offset < N) + + a = torch.Tensor([1, 2, 3, 4, 5]).cuda() -b= torch.Tensor([1, 2, 3, 4, 5]).cuda() +b = torch.Tensor([1, 2, 3, 4, 5]).cuda() print(a) print(b) -print(a + b) \ No newline at end of file +print(a + b) +