diff --git a/test/test_gpu/skip_tests_h100_pytorch.yaml b/test/test_gpu/skip_tests_h100_pytorch.yaml index 6a221d97..017c899a 100644 --- a/test/test_gpu/skip_tests_h100_pytorch.yaml +++ b/test/test_gpu/skip_tests_h100_pytorch.yaml @@ -1,9 +1,11 @@ # Tests we skip in OSS CI # This file is regarding to the Triton version bundled with pytorch -# Use to skip an entire operator -# Use to skip an impl +# Use to skip an entire operator +# Use to skip an impl bf16xint16_gemm: - bf16xint16 +flash_attention: + - triton_tutorial_flash_v2_tma fp8_attention: - colfax_fmha fp8_fused_quant_gemm_rowwise: diff --git a/tritonbench/operators/ragged_attention/operator.py b/tritonbench/operators/ragged_attention/operator.py index 770dc59d..72838636 100644 --- a/tritonbench/operators/ragged_attention/operator.py +++ b/tritonbench/operators/ragged_attention/operator.py @@ -29,7 +29,7 @@ def __init__( self.max_seq_len = 2**args.max_seq_len_log2 self.num_buckets = args.num_buckets # set a default number of inputs - self._num_inputs = 10 + self._num_inputs = 10 if self._num_inputs is None else self._num_inputs @register_benchmark() def hstu_triton_ragged_attention(self, qkv, seq_offsets, timestamps):