diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml index 6cdc913..8fd6744 100644 --- a/.github/workflows/nvidia_workflow.yml +++ b/.github/workflows/nvidia_workflow.yml @@ -45,13 +45,21 @@ jobs: echo "Triton detected, installing triton" pip install triton fi + + - name: Set up NSight permissions + run: | + sudo sh -c 'echo -1 >/proc/sys/kernel/perf_event_paranoid' + sudo sh -c 'echo "options nvidia NVreg_RestrictProfilingToAdminUsers=0" > /etc/modprobe.d/nvidia.conf' + sudo modprobe -r nvidia_uvm nvidia_drm nvidia_modeset nvidia + sudo modprobe nvidia + - name: Run script with NSight Compute profiling run: | # First run normally and capture output python "${{ github.event.inputs.filename }}" > training.log 2>&1 # Then run with NSight Compute profiling - ncu --set full --export ncu_profile $(which python) "${{ github.event.inputs.filename }}" > ncu.log 2>&1 + ncu --set full --export ncu_profile $(which python) "${{ github.event.inputs.filename }}" - name: Upload training artifacts uses: actions/upload-artifact@v3