From 76953d484b5e191e0254a1e425ea6a7bb379e605 Mon Sep 17 00:00:00 2001 From: FindHao Date: Thu, 12 Dec 2024 13:44:44 -0800 Subject: [PATCH] use nvtx.range_start --- tritonbench/components/ncu/__init__.py | 6 +++--- tritonbench/utils/triton_op.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tritonbench/components/ncu/__init__.py b/tritonbench/components/ncu/__init__.py index f3b6a8c4..2d64aa6c 100644 --- a/tritonbench/components/ncu/__init__.py +++ b/tritonbench/components/ncu/__init__.py @@ -65,7 +65,7 @@ def do_bench_in_task( x.grad = None # we clear the L2 cache before run cache.zero_() - with cuda_profiler_range(use_cuda_profiler_range), torch.cuda.nvtx.range( - range_name - ): + with cuda_profiler_range(use_cuda_profiler_range): + nvtx_range_id = torch.cuda.nvtx.range_start(range_name) fn() + torch.cuda.nvtx.range_end(nvtx_range_id) diff --git a/tritonbench/utils/triton_op.py b/tritonbench/utils/triton_op.py index 78fef3b7..b0bc5022 100644 --- a/tritonbench/utils/triton_op.py +++ b/tritonbench/utils/triton_op.py @@ -1489,7 +1489,8 @@ def service_exists(service_name): "ncu", "--nvtx", "--nvtx-include", - f"{_RANGE_NAME}/", + # it is for range_start and range_end. no ending /. + f"{_RANGE_NAME}", "--target-processes", "all", "--import-source",