Skip to content

Commit

Permalink
add nsys_gpu_speedup; fix kernel_durations
Browse files Browse the repository at this point in the history
  • Loading branch information
FindHao committed Nov 27, 2024
1 parent 69a463f commit 5cb7c02
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
4 changes: 3 additions & 1 deletion tritonbench/components/ncu/nsys_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def read_nsys_report(

# Define mapping of metrics to their values. The keys must be in nsys_bench_metrics.
metrics_map = {
"nsys_kernel_durations": kernel_duration,
# Because tritonbench takes the median of numerical values, we need to convert
# the list of floats to a list of strings.
"nsys_kernel_durations": [str(duration) for duration in kernel_duration],
"nsys_kernel_names": kernel_names,
"nsys_gpu_kernel_sum": sum_kernel_duration,
"nsys_nvtx_range_duration": nvtx_range_duration,
Expand Down
19 changes: 17 additions & 2 deletions tritonbench/utils/triton_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,12 @@ class BenchmarkOperatorBackend:
REGISTERED_METRICS: Dict[str, List[str]] = {}
REGISTERED_X_VALS: Dict[str, str] = {}
BASELINE_BENCHMARKS: Dict[str, str] = {}
BASELINE_SKIP_METRICS = {"speedup", "accuracy", "mem_footprint_compression_ratio"}
BASELINE_SKIP_METRICS = {
"speedup",
"accuracy",
"mem_footprint_compression_ratio",
"nsys_gpu_speedup",
}
X_ONLY_METRICS = set(["hw_roofline"])
PRECISION_DTYPE_MAPPING = {
"fp32": torch.float32,
Expand Down Expand Up @@ -1094,7 +1099,17 @@ def _init_extra_metrics() -> Dict[str, Any]:
)
for metric_name, metric_value in nsys_analyzer_results.items():
metrics.extra_metrics[metric_name] = metric_value

if "nsys_gpu_speedup" in self.required_metrics:
metrics.nsys_gpu_speedup = (
self.baseline_metrics.nsys_gpu_kernel_sum
/ metrics.nsys_gpu_kernel_sum
if (
self.baseline_metrics
and self.baseline_metrics.nsys_gpu_kernel_sum
)
and metrics.nsys_gpu_kernel_sum
else None
)
if "kineto_trace" in self.required_metrics:
metrics.kineto_trace = self.kineto_trace(input_id, fn)
if "best_config" in self.required_metrics:
Expand Down

0 comments on commit 5cb7c02

Please sign in to comment.