Skip to content

Commit

Permalink
[XLA:GPU] Introduce EmitterLocOpBuilder that could annotate the mlir …
Browse files Browse the repository at this point in the history
…with the file:line annotations that are visible in the triton dump

During the troubleshooting sessions it sometimes hard to find the emitter code that emitted the particular instruction. It make sense to instrument the emitter code and annotate the generated code with file:line info. The annotations emitting and dumping code is guarded with the --xla_dump_emitter_loc flag.

PiperOrigin-RevId: 697525160
  • Loading branch information
loislo authored and Google-ML-Automation committed Dec 11, 2024
1 parent 03ee8e3 commit 27dae48
Show file tree
Hide file tree
Showing 19 changed files with 773 additions and 163 deletions.
10 changes: 10 additions & 0 deletions xla/debug_options_flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_dump_hlo_as_long_text(false);
opts.set_xla_dump_large_constants(false);
opts.set_xla_dump_enable_mlir_pretty_form(true);
opts.set_xla_gpu_unsupported_annotate_with_emitter_loc(false);
opts.set_xla_debug_buffer_assignment_show_max(15);
#ifdef ENABLE_MKL
opts.set_xla_cpu_use_mkl_dnn(true);
Expand Down Expand Up @@ -994,6 +995,15 @@ void MakeDebugOptionsFlags(std::vector<tsl::Flag>* flag_list,
"and \"test_undeclared_outputs_dir\" have a special meaning: They cause "
"us to dump into the directory specified by the environment variable "
"TEST_UNDECLARED_OUTPUTS_DIR."));
flag_list->push_back(tsl::Flag(
"xla_gpu_unsupported_annotate_with_emitter_loc",
bool_setter_for(
&DebugOptions::set_xla_gpu_unsupported_annotate_with_emitter_loc),
debug_options->xla_gpu_unsupported_annotate_with_emitter_loc(),
"Forces emitters that use MLIR to annotate all the created MLIR "
"instructions with the emitter's C++ source file and line number. The "
"annotations should appear in the MLIR dumps. The emitters should use "
"EmitterLocOpBuilder for that."));
flag_list->push_back(tsl::Flag(
"xla_dump_hlo_as_text",
bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_text),
Expand Down
32 changes: 32 additions & 0 deletions xla/service/gpu/fusions/BUILD
Original file line number Diff line number Diff line change
@@ -1,13 +1,45 @@
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
load("//xla:xla.bzl", "xla_cc_test")
load("//xla/tests:build_defs.bzl", "xla_test")
load("//xla/tsl:tsl.bzl", "if_google")
load("//xla/tsl/platform/default:cuda_build_defs.bzl", "if_cuda_is_configured")

package(
# copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
licenses = ["notice"],
)

cc_library(
name = "emitter_loc_op_builder",
srcs = ["emitter_loc_op_builder.cc"],
hdrs = ["emitter_loc_op_builder.h"],
visibility = ["//xla/service/gpu/fusions:__subpackages__"],
deps = [
"@com_google_absl//absl/strings",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:Support",
"@tsl//tsl/platform",
] + if_google(["@com_google_absl//absl/types:source_location"]),
)

xla_test(
name = "emitter_loc_op_builder_test",
srcs = ["emitter_loc_op_builder_test.cc"],
backends = ["gpu"],
deps = [
":emitter_loc_op_builder",
"//xla/hlo/testlib:filecheck",
"//xla/service/gpu/fusions/triton:triton_fusion_emitter",
"//xla/service/llvm_ir:llvm_util",
"//xla/tests:xla_internal_test_main",
"@com_google_absl//absl/strings:string_view",
"@llvm-project//mlir:ArithDialect",
"@llvm-project//mlir:IR",
"@tsl//tsl/platform:status_matchers",
"@tsl//tsl/platform:test",
],
)

cc_library(
name = "in_place_dynamic_update_slice_mlir",
srcs = ["in_place_dynamic_update_slice_mlir.cc"],
Expand Down
77 changes: 77 additions & 0 deletions xla/service/gpu/fusions/emitter_loc_op_builder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "xla/service/gpu/fusions/emitter_loc_op_builder.h"

#include <algorithm>
#include <cstddef>
#include <string>
#include <vector>

#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Location.h"
#include "mlir/Support/LLVM.h"

namespace xla::gpu {

// Aligns the annotations to the Nth character of the lines.
constexpr size_t kAnnotationPadding = 100ul;

/* static */ std::string EmitterLocOpBuilder::FormatTritonIrWithAnnotations(
absl::string_view mlir_ir) {
auto triton_with_annotations = absl::StrSplit(mlir_ir, '\n');
std::vector<std::string> formatted_lines;
for (auto& line : triton_with_annotations) {
std::vector<std::string> line_and_annotation = absl::StrSplit(line, '"');
constexpr int kInstructionLineFragments = 3;
if (line_and_annotation.size() != kInstructionLineFragments) {
// The line does not matches with the pattern:
// x = instruction(y, z) "annotation"
// So we just add it to the output as is.
formatted_lines.emplace_back(line);
continue;
}
auto padding = std::min(line_and_annotation[0].size(), kAnnotationPadding);
auto new_line = absl::StrCat(
line_and_annotation[0], std::string(kAnnotationPadding - padding, ' '),
"\"", line_and_annotation[1], "\"", line_and_annotation[2]);
formatted_lines.emplace_back(new_line);
}
return absl::StrJoin(formatted_lines, "\n");
}

mlir::Location EmitterLocOpBuilder::Loc(
EmitterLocOpBuilder::SourceLocation location) const {
if (!annotate_loc_ || location.line() == 0) {
return current_loc_;
}
std::vector<std::string> file_name =
absl::StrSplit(location.file_name(), '/');
std::string previous_loc;
if (mlir::isa<mlir::NameLoc>(current_loc_)) {
auto name_loc = mlir::cast<mlir::NameLoc>(current_loc_);
previous_loc = name_loc.getName().str();
}

const std::string text = absl::StrCat(previous_loc, " -> ", file_name.back(),
":", location.line());
return mlir::NameLoc::get(mlir::StringAttr::get(getContext(), text));
}

} // namespace xla::gpu
210 changes: 210 additions & 0 deletions xla/service/gpu/fusions/emitter_loc_op_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef XLA_SERVICE_GPU_FUSIONS_EMITTER_LOC_OP_BUILDER_H_
#define XLA_SERVICE_GPU_FUSIONS_EMITTER_LOC_OP_BUILDER_H_

#include <string>

#include "absl/strings/string_view.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"
#include "tsl/platform/platform.h"

#if defined(PLATFORM_GOOGLE)
// The source_location.h is not available in open source.
#include "absl/types/source_location.h"
#else
#include <string_view>
#endif

namespace xla::gpu {

// The builder that could add the NameLoc attribute to the newly created
// operations and fills this attribute with the SourceLocation(file:line) of the
// create<OpTy>(...) calls. The location info will be added to the current_loc_
// location that the builder got through the constructor. The copy constructor
// also remembers the source location where the copy was created.
//
// Why: it is useful for tracking up the emitter file and line from the
// generated MLIR.
//
// How:
// 1. create<OpTy>(...) functions have absl::SourceLocation as the last
// argument with the default value of SourceLocation::current(). Every time they
// construct a new NameLoc attribute that contains the string from the
// current_loc_ and file:line from the source location parameter.
//
// 2. The copy constructor also gets the source location as the argument and
// remembers it in the current_loc_ as a join of the original current_loc_ and
// the place where the copy was created.
class EmitterLocOpBuilder : public mlir::ImplicitLocOpBuilder {
public:
// TODO(b/382419919): Remove ifdefs once we have absl::SourceLocation in absl
// OSS builds.
#if defined(PLATFORM_GOOGLE)
using SourceLocation = absl::SourceLocation;
constexpr static bool kSourceLocationSupported = true;
#else
// Mimicking absl::SourceLocation and doing nothing.
class FakeSourceLocation {
public:
static FakeSourceLocation current() { return FakeSourceLocation(); }
std::string_view file_name() const { return ""; }
int line() const { return 0; }
};
using SourceLocation = FakeSourceLocation;
constexpr static bool kSourceLocationSupported = false;
#endif

// Constructor that takes the op builder and a flag indicating whether to
// annotate the location of the operations.
EmitterLocOpBuilder(mlir::ImplicitLocOpBuilder& op_builder, bool annotate_loc)
: mlir::ImplicitLocOpBuilder(op_builder),
current_loc_(op_builder.getLoc()),
annotate_loc_(annotate_loc) {}

// A few constructors below that could be used when we replace the
// mlir::ImplicitLocOpBuilder and mlir::OpBuilder one by one.
// The intent is to use EmitterLocOpBuilder everywhere in the emitters.

// The constructor that should be used instead of mlir::ImplicitLocOpBuilder.
EmitterLocOpBuilder(mlir::Location loc, mlir::OpBuilder& op_builder,
bool annotate_loc = false)
: mlir::ImplicitLocOpBuilder(loc, op_builder),
current_loc_(loc),
annotate_loc_(annotate_loc) {}

// The constructor that should be used instead of mlir::ImplicitLocOpBuilder.
EmitterLocOpBuilder(mlir::Location loc, mlir::MLIRContext* mlir_context,
bool annotate_loc = false)
: mlir::ImplicitLocOpBuilder(loc, mlir_context),
current_loc_(loc),
annotate_loc_(annotate_loc) {}

// Constructor that should be used instead of mlir::OpBuilder.
explicit EmitterLocOpBuilder(
mlir::MLIRContext* mlir_context, bool annotate_loc = false,
SourceLocation location = SourceLocation::current())
: mlir::ImplicitLocOpBuilder(Loc(location), mlir_context),
current_loc_(Loc(location)),
annotate_loc_(annotate_loc) {}

EmitterLocOpBuilder& operator=(const EmitterLocOpBuilder&) = delete;

// Copy constructor that also remembers the source location where the copy
// was created. If the helper functions that gets the builder as the argument
// receives the argument by value then the current location points to the
// place where the copy was created.
EmitterLocOpBuilder(const EmitterLocOpBuilder& builder,
SourceLocation location = SourceLocation::current())
: mlir::ImplicitLocOpBuilder(builder),
current_loc_(builder.Loc(location)),
annotate_loc_(builder.annotate_loc_) {}

// Helper function to create a location from a source location.
mlir::Location Loc(SourceLocation location) const;

// Formats the MLIR IR with annotations to make it easier to read.
static std::string FormatTritonIrWithAnnotations(absl::string_view mlir_ir);

// Below is the set of create() methods that are used to create operations.
// These are all templated to allow for the creation of operations with
// different numbers of arguments.
//
// For some reason the version of create that accepts the variadic arguments
// and a source location with the default value does not work.

template <typename OpTy>
OpTy create(SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(Loc(location));
}

// Creates an operation with the given type and one argument.
template <typename OpTy, typename Arg0>
OpTy create(Arg0&& arg, SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(Loc(location), std::forward<Arg0>(arg));
}
template <typename OpTy, typename Arg0, typename Arg1>
OpTy create(Arg0&& arg0, Arg1&& arg1,
SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(Loc(location), std::forward<Arg0>(arg0),
std::forward<Arg1>(arg1));
}
template <typename OpTy, typename Arg0, typename Arg1, typename Arg2>
OpTy create(Arg0&& arg0, Arg1&& arg1, Arg2&& arg2,
SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(Loc(location), std::forward<Arg0>(arg0),
std::forward<Arg1>(arg1),
std::forward<Arg2>(arg2));
}

template <typename OpTy, typename Arg0, typename Arg1, typename Arg2,
typename Arg3>
OpTy create(Arg0&& arg0, Arg1&& arg1, Arg2&& arg2, Arg3&& arg3,
SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(
Loc(location), std::forward<Arg0>(arg0), std::forward<Arg1>(arg1),
std::forward<Arg2>(arg2), std::forward<Arg3>(arg3));
}

template <typename OpTy, typename Arg0, typename Arg1, typename Arg2,
typename Arg3, typename Arg4>
OpTy create(Arg0&& arg0, Arg1&& arg1, Arg2&& arg2, Arg3&& arg3, Arg4&& arg4,
SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(
Loc(location), std::forward<Arg0>(arg0), std::forward<Arg1>(arg1),
std::forward<Arg2>(arg2), std::forward<Arg3>(arg3),
std::forward<Arg4>(arg4));
}

template <typename OpTy, typename Arg0, typename Arg1, typename Arg2,
typename Arg3, typename Arg4, typename Arg5>
OpTy create(Arg0&& arg0, Arg1&& arg1, Arg2&& arg2, Arg3&& arg3, Arg4&& arg4,
Arg5&& arg5,
SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(
Loc(location), std::forward<Arg0>(arg0), std::forward<Arg1>(arg1),
std::forward<Arg2>(arg2), std::forward<Arg3>(arg3),
std::forward<Arg4>(arg4), std::forward<Arg5>(arg5));
}
template <typename OpTy, typename Arg0, typename Arg1, typename Arg2,
typename Arg3, typename Arg4, typename Arg5, typename Arg6>
OpTy create(Arg0&& arg0, Arg1&& arg1, Arg2&& arg2, Arg3&& arg3, Arg4&& arg4,
Arg5&& arg5, Arg6&& arg6,
SourceLocation location = SourceLocation::current()) {
return OpBuilder::create<OpTy>(
Loc(location), std::forward<Arg0>(arg0), std::forward<Arg1>(arg1),
std::forward<Arg2>(arg2), std::forward<Arg3>(arg3),
std::forward<Arg4>(arg4), std::forward<Arg5>(arg5),
std::forward<Arg6>(arg6));
}

mlir::Location current_loc() const { return current_loc_; }

bool annotate_loc() const { return annotate_loc_; }

private:
// Keep the current location of the builder and use it for annotating the
// newly created operations.
const mlir::Location current_loc_;
const bool annotate_loc_;
};

} // namespace xla::gpu

#endif // XLA_SERVICE_GPU_FUSIONS_EMITTER_LOC_OP_BUILDER_H_
Loading

0 comments on commit 27dae48

Please sign in to comment.