Skip to content

Commit

Permalink
make Xe/GPU stuff compile (not necessarily work) with newer llvm
Browse files Browse the repository at this point in the history
  • Loading branch information
fschlimb committed Nov 12, 2024
1 parent d64770a commit 110a91e
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 37 deletions.
4 changes: 2 additions & 2 deletions include/imex/Utils/XeCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,10 +439,10 @@ template <typename OpType> unsigned encodeCacheHint(OpType op) {
}
return cacheHint;
}
class XeTypeConverter : public mlir::OneToNTypeConverter {
class XeTypeConverter : public mlir::TypeConverter {
public:
// friend class XeConversionPattern;
using mlir::OneToNTypeConverter::convertType;
using mlir::TypeConverter::convertType;

XeTypeConverter(mlir::MLIRContext &context) {
addConversion([&](xetile::TileType tileTy,
Expand Down
2 changes: 1 addition & 1 deletion lib/Conversion/XeGPUToVC/LSCPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ class LoadNdPattern : public OpConversionPattern<LoadNdOp> {
// TODO: remove this after moving transposeBitWidth into a standalone
// pass. update the width and pictch of the payload when transposeBitWidth
// is set, and larger than the element bit width.
auto transposeBitWidth = op.getTransposeBitWidth().value_or(0);
auto transposeBitWidth = 32; //op.getTransposeBitWidth().value_or(0);
auto factor = transposeBitWidth / elemTy.getIntOrFloatBitWidth();
if (factor > 1) {
// update the block offset X of the payload, since it is in unit of
Expand Down
44 changes: 22 additions & 22 deletions lib/Conversion/XeGPUToVC/XeGPUToVC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ using mlir::scf::ForOp;
using mlir::scf::YieldOp;
using mlir::vector::ShapeCastOp;
using mlir::xegpu::AllocNbarrierOp;
using mlir::xegpu::CompileHintOp;
// using mlir::xegpu::CompileHintOp;
using mlir::xegpu::CreateDescOp;
using mlir::xegpu::CreateNdDescOp;
using mlir::xegpu::InitNbarrierOp;
Expand Down Expand Up @@ -592,13 +592,13 @@ class AllocNbarrierPattern : public OpConversionPattern<AllocNbarrierOp> {
OpBuilder::InsertionGuard guard(rewriter);
auto func = op->getParentOfType<gpu::GPUFuncOp>();
rewriter.setInsertionPointAfter(func);
auto executionModeAttr = spirv::ExecutionModeAttr::get(
rewriter.getContext(), spirv::ExecutionMode::NamedBarrierCountINTEL);
// auto executionModeAttr = spirv::ExecutionModeAttr::get(
// rewriter.getContext(), spirv::ExecutionMode::NamedBarrierCountINTEL);

auto execModeFuncAttr = spirv::ExecutionModeFuncAttributeAttr::get(
rewriter.getContext(), executionModeAttr, op.getNbarrierNum());
// auto execModeFuncAttr = spirv::ExecutionModeFuncAttributeAttr::get(
// rewriter.getContext(), executionModeAttr, op.getNbarrierNum());

func->setAttr("spirv.execution_mode", execModeFuncAttr);
// func->setAttr("spirv.execution_mode", execModeFuncAttr);

rewriter.eraseOp(op);
return success();
Expand Down Expand Up @@ -754,24 +754,24 @@ class NbarrierWaitPattern : public OpConversionPattern<NbarrierWaitOp> {
}
};

class CompilerHintPattern : public OpConversionPattern<CompileHintOp> {
public:
using OpConversionPattern<CompileHintOp>::OpConversionPattern;
// class CompilerHintPattern : public OpConversionPattern<CompileHintOp> {
// public:
// using OpConversionPattern<CompileHintOp>::OpConversionPattern;

LogicalResult
matchAndRewrite(CompileHintOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = op.getLoc();
// LogicalResult
// matchAndRewrite(CompileHintOp op, OpAdaptor adaptor,
// ConversionPatternRewriter &rewriter) const override {
// auto loc = op.getLoc();

std::string funcName = "llvm.genx.fence";
Value fence_flag = i8_val(-128);
SmallVector<Value> args{fence_flag};
// std::string funcName = "llvm.genx.fence";
// Value fence_flag = i8_val(-128);
// SmallVector<Value> args{fence_flag};

createFuncCall(rewriter, loc, funcName, TypeRange{}, args, false);
rewriter.eraseOp(op);
return success();
}
};
// createFuncCall(rewriter, loc, funcName, TypeRange{}, args, false);
// rewriter.eraseOp(op);
// return success();
// }
// };

static bool isGenericVectorTy(Type type) {
if (isa<spirv::ScalarType>(type))
Expand Down Expand Up @@ -952,7 +952,7 @@ struct XeGPUToVCPass : public imex::impl::ConvertXeGPUToVCBase<XeGPUToVCPass> {
patterns.getContext());

// Ops to llvm.genx only Patterns
patterns.add<NbarrierWaitPattern, CompilerHintPattern,
patterns.add<NbarrierWaitPattern, // CompilerHintPattern,
ElementwiseToVCPattern<arith::MaximumFOp>, DpasPattern,
NbarrierArrivePattern>(patterns.getContext());

Expand Down
2 changes: 1 addition & 1 deletion lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ struct SgLoadTileOpPattern : public XeOneToNConversion<xetile::LoadTileOp> {

auto vectorTy = mlir::VectorType::get(shape, elemTy);
auto ldOp = rewriter.create<mlir::xegpu::LoadNdOp>(
op.getLoc(), vectorTy, src, nullptr, nullptr, nullptr, L1, L2, L3);
op.getLoc(), vectorTy, src, nullptr, nullptr, /*nullptr,*/ L1, L2, L3);
if (array_length == 1) {
xegpuOps.push_back(ldOp);
} else {
Expand Down
4 changes: 2 additions & 2 deletions lib/Conversion/XeTileToXeGPU/XeTileToXeGPUConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ XeOneToNTypeConverter::XeOneToNTypeConverter(mlir::MLIRContext &context)
addArgumentMaterialization(
[&](mlir::OpBuilder &builder, mlir::Type resultType,
mlir::ValueRange inputs,
mlir::Location loc) -> std::optional<mlir::Value> {
mlir::Location loc) -> mlir::Value {
return builder
.create<mlir::UnrealizedConversionCastOp>(loc, resultType, inputs)
.getResult(0);
Expand All @@ -101,7 +101,7 @@ XeOneToNTypeConverter::XeOneToNTypeConverter(mlir::MLIRContext &context)
addSourceMaterialization(
[&](mlir::OpBuilder &builder, mlir::Type resultType,
mlir::ValueRange inputs,
mlir::Location loc) -> std::optional<mlir::Value> {
mlir::Location loc) -> mlir::Value {
return builder
.create<mlir::UnrealizedConversionCastOp>(loc, resultType, inputs)
.getResult(0);
Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/XeTile/Transforms/Canonicalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ struct XeTileCanonicalizationPass final
mlir::ValueRange inputs, mlir::Location loc) {
auto cast =
builder.create<mlir::UnrealizedConversionCastOp>(loc, type, inputs);
return std::optional<mlir::Value>(cast.getResult(0));
return cast.getResult(0);
};
typeConverter.addConversion([](mlir::Type type) { return type; });
typeConverter.addConversion([](imex::xetile::TileType tileTy) {
Expand Down
12 changes: 6 additions & 6 deletions lib/Transforms/OptimizeTranspose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ static mlir::Value createBlockLoad(mlir::TypedValue<mlir::MemRefType> slm,
loc, tdescTy, slm, llvm::ArrayRef<mlir::OpFoldResult>({offset}));
mlir::Value value = rewriter.create<mlir::xegpu::LoadNdOp>(
loc, loadTy, tdesc, nullptr /*packed*/, nullptr /*transpose*/,
nullptr /*transpose_bit_width*/, nullptr /*l1_hint*/,
/* nullptr transpose_bit_width,*/ nullptr /*l1_hint*/,
nullptr /*l2_hint*/, nullptr /*l3_hint*/);
// if original data is not 32-bit, need to bitcast current 32-bit data
// back to original element type.
Expand Down Expand Up @@ -525,12 +525,12 @@ struct TransposeRewritePattern
auto packedAttr = mlir::UnitAttr(); // empty packed attribute.
auto transposeAttr =
mlir::DenseI64ArrayAttr::get(rewriter.getContext(), {1, 0});
auto transposeBitWidthAttr = mlir::IntegerAttr::get(
rewriter.getIntegerType(32),
32); // need to do a 32 bit transpose to get the packed layout.
// auto transposeBitWidthAttr = mlir::IntegerAttr::get(
// rewriter.getIntegerType(32),
// 32); // need to do a 32 bit transpose to get the packed layout.
auto newLoadOp = rewriter.create<mlir::xegpu::LoadNdOp>(
loadOp.getLoc(), newVectorTy, loadOp.getTensorDesc(), packedAttr,
transposeAttr, transposeBitWidthAttr, loadOp.getL1HintAttr(),
transposeAttr, /*transposeBitWidthAttr,*/ loadOp.getL1HintAttr(),
loadOp.getL2HintAttr(), loadOp.getL3HintAttr());
// Replace the uses of the packed layout conversion with new load.
rewriter.replaceAllUsesWith(packedLayoutOps.back()->getResult(0),
Expand All @@ -554,7 +554,7 @@ struct TransposeRewritePattern
mlir::DenseI64ArrayAttr::get(rewriter.getContext(), {1, 0});
auto newLoadOp = rewriter.create<mlir::xegpu::LoadNdOp>(
loadOp.getLoc(), newVectorTy, loadOp.getTensorDesc(), packedAttr,
transposeAttr, mlir::IntegerAttr(), loadOp.getL1HintAttr(),
transposeAttr, /*mlir::IntegerAttr(),*/ loadOp.getL1HintAttr(),
loadOp.getL2HintAttr(), loadOp.getL3HintAttr());
rewriter.replaceAllUsesWith(op.getResult(), newLoadOp.getResult());
}
Expand Down
4 changes: 2 additions & 2 deletions lib/Transforms/SetSPIRVCapabilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct SetSPIRVCapabilitiesPass
// clang-format off
spirv::Capability::Addresses,
spirv::Capability::Bfloat16ConversionINTEL,
spirv::Capability::BFloat16TypeKHR,
// spirv::Capability::BFloat16TypeKHR,
spirv::Capability::Float16Buffer,
spirv::Capability::Int64,
spirv::Capability::Int16,
Expand All @@ -81,7 +81,7 @@ struct SetSPIRVCapabilitiesPass
spirv::Extension exts_opencl[] = {
// clang-format off
spirv::Extension::SPV_EXT_shader_atomic_float_add,
spirv::Extension::SPV_KHR_bfloat16,
// spirv::Extension::SPV_KHR_bfloat16,
spirv::Extension::SPV_KHR_expect_assume,
spirv::Extension::SPV_INTEL_bfloat16_conversion,
spirv::Extension::SPV_INTEL_vector_compute
Expand Down

0 comments on commit 110a91e

Please sign in to comment.