Skip to content

Commit

Permalink
Use the GenISA for the 2D matrix load of the varianet 32b 8x8x2c. (#1427
Browse files Browse the repository at this point in the history
)

The OCL interface is in-correctly lowered to GenISA.

Use the GenISA directly to work around the issue.

#1426
  • Loading branch information
chengjunlu authored Jun 21, 2024
1 parent 9132108 commit 535a3f2
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
3 changes: 1 addition & 2 deletions test/TritonGEN/tritongen-2Dblockload-to-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,7 @@ llvm.func @triton_gen.2Dblockload(%ptr : !llvm.ptr<1>, %base_width : i32, %base_
// -----

llvm.func @triton_gen.2Dblockload(%ptr : !llvm.ptr<1>, %base_width : i32, %base_height : i32, %base_pitch : i32, %x : i32, %y : i32) {
// CHECK: llvm.call spir_funccc @_Z40intel_sub_group_2d_block_read_32b_8r8x2cPU3AS1viiiDv2_iPj(%arg0, %arg1, %arg2, %arg3, {{.*}}, [[DEST:%.*]]) {{.*}} : (!llvm.ptr<1>, i32, i32, i32, vector<2xi32>, !llvm.ptr) -> ()
// CHECK-NEXT: llvm.load [[DEST]] : !llvm.ptr -> vector<8xi32>
// CHECK: llvm.call spir_funccc @llvm.genx.GenISA.LSC2DBlockRead.v8i32
%0 = triton_gen.2Dblockload %ptr, %base_width, %base_height, %base_pitch, %x, %y {elem_size_in_bits=32, tile_width=8, tile_height=8, v_blocks=2, transpose=false, vnni_transform=false, cache_control=Default} : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32>
llvm.return
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,11 @@ static bool isOCLBuiltinAvailable(TritonGEN::Matrix2DBlockLoadOp op) {
// intel_sub_group_2d_block_read_32b_8r8x1c is expected to be lowered to
// llvm.genx.GenISA.LSC2DBlockRead.v4i32, but it is incorrectly lowered to
// llvm.genx.GenISA.LSC2DBlockRead.v8i32.
// intel_sub_group_2d_block_read_32b_8r8x2c is expected to be lowered to
// llvm.genx.GenISA.LSC2DBlockRead.v8i32, but it is incorrectly lowered to
// llvm.genx.GenISA.LSC2DBlockRead.v16i32.
if (op.getElemSizeInBits() == 32 && op.getTileHeight() == 8 &&
op.getTileWidth() == 8 && op.getVBlocks() == 1)
op.getTileWidth() == 8)
return false;

// Missing intel_sub_group_2d_block_read_32b_8r16x1c and
Expand Down

0 comments on commit 535a3f2

Please sign in to comment.