Skip to content

Commit

Permalink
[MLIR][Transform] Hoist Pad generates linalg.transpose (llvm#109669)
Browse files Browse the repository at this point in the history
For readability purpose, generate linalg named ops when possible.
For maintainability purpose, get rid of duplicated code.
  • Loading branch information
nujaa authored Sep 26, 2024
1 parent c808e66 commit 2803905
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 69 deletions.
12 changes: 6 additions & 6 deletions mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ namespace detail {
struct PackingResult {
SmallVector<OpFoldResult> offsets, sizes, strides;
SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;
GenericOp maybeTransposeOp;
TransposeOp maybeTransposeOp;
tensor::PadOp hoistedPadOp;
};

Expand All @@ -568,9 +568,9 @@ buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist,
/// a larger tensor. On success, `opToHoist` is replaced by the cloned version
/// in the packing loop so the caller can continue reasoning about the padding
/// operation. If `transposeVector` is non-empty, hoist padding introduces a
/// GenericOp to transpose the padded tensor before inserting it into the packed
/// tensor. A `transposeVector` can change the storage order of the padded
/// tensor but does not change the order of the pack or compute loops.
/// TransposeOp to transpose the padded tensor before inserting it into the
/// packed tensor. A `transposeVector` can change the storage order of the
/// padded tensor but does not change the order of the pack or compute loops.
///
/// TODO: In the future, we should consider rewriting as a tensor.pack after
/// hoisting since this abstraction is now available.
Expand Down Expand Up @@ -615,13 +615,13 @@ FailureOr<Value>
hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist,
int64_t numLoops, ArrayRef<int64_t> transposeVector,
tensor::PadOp &hoistedOp,
SmallVectorImpl<GenericOp> &transposeOps);
SmallVectorImpl<TransposeOp> &transposeOps);
/// Calls into `hoistPaddingOnTensors` with a local IRRewriter.
FailureOr<Value>
hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops,
ArrayRef<int64_t> transposeVector,
tensor::PadOp &hoistedOp,
SmallVectorImpl<GenericOp> &transposeOps);
SmallVectorImpl<TransposeOp> &transposeOps);

/// Apply padding and hoisting to `linalgOp` according to the configuration
/// specified in `options`.
Expand Down
6 changes: 0 additions & 6 deletions mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,6 @@ bool isReductionIterator(utils::IteratorType iteratorType);
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
Value source, Value pad, bool nofold);

/// Returns a GenericOp that transposes `inputTensor` into `outputTensor`
/// using `transposeVector` to permute the `inputTensor` dimensions.
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
Value outputTensor,
ArrayRef<int64_t> transposeVector);

/// Returns GenericOp that copies an n-D memref. Unlike the current
/// implementation of memref::CopyOp, this op can further tile, lower to loops
/// or vectorize.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2000,7 +2000,7 @@ transform::HoistPadOp::applyToOne(transform::TransformRewriter &rewriter,
transform::ApplyToEachResultList &results,
transform::TransformState &state) {
tensor::PadOp hoistedPadOp;
SmallVector<GenericOp> transposeOps;
SmallVector<TransposeOp> transposeOps;
FailureOr<Value> result =
hoistPaddingOnTensors(rewriter, target, getNumLoops(), getTranspose(),
hoistedPadOp, transposeOps);
Expand Down
25 changes: 12 additions & 13 deletions mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,15 +633,15 @@ static FailureOr<PackingResult> buildPackingLoopNestImpl(
rewriter.getIndexAttr(1));

// Step 3. Optionally transpose the padded tensor.
GenericOp maybeTransposeOp;
TransposeOp maybeTransposeOp;
Value paddedTensor = bvm.lookup(opToHoist.getResult());
if (!transposeVector.empty()) {
Value outputTensor = rewriter.create<tensor::ExtractSliceOp>(
loc, transposedTensorType, hoistedPackedTensor, offsets, sizes,
strides);
maybeTransposeOp = makeTransposeOp(rewriter, loc, paddedTensor,
outputTensor, transposeVector);
paddedTensor = maybeTransposeOp.getResult(0);
maybeTransposeOp = rewriter.create<linalg::TransposeOp>(
loc, paddedTensor, outputTensor, transposeVector);
paddedTensor = maybeTransposeOp.getResult()[0];
}

// Innermost tensor.insert_slice and yields are optional / need loops.
Expand Down Expand Up @@ -938,7 +938,7 @@ static Value replaceByPackingResult(RewriterBase &rewriter,
FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops,
ArrayRef<int64_t> transposeVector, tensor::PadOp &hoistedOp,
SmallVectorImpl<GenericOp> &transposeOps) {
SmallVectorImpl<TransposeOp> &transposeOps) {
LLVM_DEBUG(DBGS() << "\n"; DBGS() << " Try to hoist " << *(opToHoist) << "\n";
DBGS() << " by " << numLoops << " loops\n");

Expand Down Expand Up @@ -980,9 +980,9 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
// Transpose the packed tensor back to the original storage order.
Value emptyTensor = rewriter.create<tensor::EmptyOp>(
loc, paddedTensorType.getShape(), paddedTensorType.getElementType());
GenericOp unTransposeOp =
makeTransposeOp(rewriter, loc, newResult, emptyTensor, transposeVector);
newResult = unTransposeOp.getResult(0);
TransposeOp unTransposeOp = rewriter.create<linalg::TransposeOp>(
loc, newResult, emptyTensor, transposeVector);
newResult = unTransposeOp.getResult()[0];
transposeOps.push_back(unTransposeOp);
}

Expand All @@ -999,11 +999,10 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
return newResult;
}

FailureOr<Value>
mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops,
ArrayRef<int64_t> transposeVector,
tensor::PadOp &hoistedOp,
SmallVectorImpl<GenericOp> &transposeOps) {
FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
tensor::PadOp opToHoist, int64_t numLoops,
ArrayRef<int64_t> transposeVector, tensor::PadOp &hoistedOp,
SmallVectorImpl<TransposeOp> &transposeOps) {
IRRewriter rewriter(opToHoist.getContext());
return hoistPaddingOnTensors(rewriter, opToHoist, numLoops, transposeVector,
hoistedOp, transposeOps);
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/Linalg/Transforms/Padding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ mlir::linalg::padAndHoistLinalgOp(RewriterBase &rewriter, LinalgOp linalgOp,
}

tensor::PadOp hoistedOp;
SmallVector<GenericOp> transposeOps;
SmallVector<TransposeOp> transposeOps;
SmallVector<int64_t> transposeVector =
en.index() < options.transposePaddings.size()
? options.transposePaddings[en.index()]
Expand Down
35 changes: 0 additions & 35 deletions mlir/lib/Dialect/Linalg/Utils/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,41 +249,6 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
return sliceOp.getSource();
}

GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
Value outputTensor,
ArrayRef<int64_t> transposeVector) {
auto resultTensorType = cast<RankedTensorType>(outputTensor.getType());
Type elementType = resultTensorType.getElementType();

assert(isPermutationVector(transposeVector) &&
"expect transpose vector to be a permutation");
assert(transposeVector.size() ==
static_cast<size_t>(resultTensorType.getRank()) &&
"expect transpose vector size to match result tensor rank");

// Compute the transpose and the indentity indexing maps.
SmallVector<AffineMap> indexingMaps = {
inversePermutation(AffineMap::getPermutationMap(
SmallVector<unsigned>(transposeVector), b.getContext())),
AffineMap::getMultiDimIdentityMap(transposeVector.size(),
b.getContext())};
SmallVector<utils::IteratorType> iteratorTypes(transposeVector.size(),
utils::IteratorType::parallel);

// Create a GenericOp to transpose `inputTensor` into `outputTensor`.
auto transposeOp =
b.create<GenericOp>(loc, resultTensorType, inputTensor, outputTensor,
indexingMaps, iteratorTypes);

// Create the body of the transpose operation.
OpBuilder::InsertionGuard g(b);
Region &body = transposeOp.getRegion();
Block *bodyBlock = b.createBlock(&body, /*insertPt=*/{},
{elementType, elementType}, {loc, loc});
b.create<YieldOp>(loc, bodyBlock->getArgument(0));
return transposeOp;
}

GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
auto memrefTypeTo = cast<MemRefType>(to.getType());
#ifndef NDEBUG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ func.func @pad_and_hoist_lhs_transpose(
// BUILD-PACKING-LOOP-NEST: %[[PACKED:.*]] = scf.for %{{.*}} -> (tensor<?x12x5xf32>) {
// BUILD-PACKING-LOOP-NEST: tensor.pad %{{.*}}
// BUILD-PACKING-LOOP-NEST: : tensor<?x12xf32> to tensor<5x12xf32>
// BUILD-PACKING-LOOP-NEST: linalg.generic
// BUILD-PACKING-LOOP-NEST: -> tensor<12x5xf32>
// BUILD-PACKING-LOOP-NEST: linalg.transpose
// BUILD-PACKING-LOOP-NEST: ins({{.*}} : tensor<5x12xf32>) outs({{.*}} : tensor<12x5xf32>)
// BUILD-PACKING-LOOP-NEST: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1]
// BUILD-PACKING-LOOP-NEST-SAME: : tensor<12x5xf32> into tensor<?x12x5xf32>
// BUILD-PACKING-LOOP-NEST: scf.for %{{.*}} -> (tensor<24x25xf32>)
Expand Down
10 changes: 5 additions & 5 deletions mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,17 @@ func.func @pad_and_hoist_lhs_transpose(
-> tensor<24x25xf32>
{
// CHECK: %[[PACKED:.*]] = scf.for %{{.*}} -> (tensor<5x12x5xf32>) {
// CHECK: tensor.pad %{{.*}}
// CHECK: %[[PAD:.*]] = tensor.pad %{{.*}}
// CHECK: : tensor<?x12xf32> to tensor<5x12xf32>
// CHECK: linalg.generic
// CHECK: -> tensor<12x5xf32>
// CHECK: linalg.transpose
// CHECK: ins(%[[PAD]] : tensor<5x12xf32>) outs(%{{.*}} : tensor<12x5xf32>)
// CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1]
// CHECK-SAME: : tensor<12x5xf32> into tensor<5x12x5xf32>
// CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>) {
// CHECK: %[[PADDED:.*]] = tensor.extract_slice %[[PACKED]][%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1]
// CHECK-SAME: : tensor<5x12x5xf32> to tensor<12x5xf32>
// CHECK: %[[TRANSPOSED:.*]] = linalg.generic
// CHECK: -> tensor<5x12xf32>
// CHECK: %[[TRANSPOSED:.*]] = linalg.transpose ins(%[[PADDED]] : tensor<12x5xf32>)
// CHECK: outs(%{{.*}} : tensor<5x12xf32>
// CHECK: linalg.matmul ins(%[[TRANSPOSED]]
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
func.return %0 : tensor<24x25xf32>
Expand Down

0 comments on commit 2803905

Please sign in to comment.