From f149b619d9ca7a99bdd2c7254e91915ad4ec5abe Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Thu, 14 Sep 2023 21:11:03 -0500 Subject: [PATCH 1/5] Throw an error when mlir's outpus shape is non-standard (#2150) --- src/targets/gpu/fuse_mlir.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/targets/gpu/fuse_mlir.cpp b/src/targets/gpu/fuse_mlir.cpp index bdd7cbd2f65..e9481b7d6e7 100644 --- a/src/targets/gpu/fuse_mlir.cpp +++ b/src/targets/gpu/fuse_mlir.cpp @@ -103,7 +103,10 @@ struct mlir_op } if(ins->name() == "@return") { - return ins_shapes[ins->inputs().at(0)].with_type(type); + auto s = ins_shapes[ins->inputs().at(0)].with_type(type); + if(not s.standard()) + MIGRAPHX_THROW("MLIR doesnt support non-standard output"); + return s; } std::vector input_shapes; input_shapes.resize(ins->inputs().size()); From 74ba96498c8ed667a27a11d4dafcdb65b1736ea3 Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Fri, 15 Sep 2023 08:10:45 -0500 Subject: [PATCH 2/5] Add fp16 flag to test runner to check models quantized to fp16 (#2182) --- tools/test_runner.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/test_runner.py b/tools/test_runner.py index 8fb16fc17e8..8bcd9fbc7b5 100644 --- a/tools/test_runner.py +++ b/tools/test_runner.py @@ -39,6 +39,15 @@ def parse_args(): type=str, default='gpu', help='Specify where the tests execute (ref, gpu)') + parser.add_argument('--fp16', action='store_true', help='Quantize to fp16') + parser.add_argument('--atol', + type=float, + default=1e-3, + help='The absolute tolerance parameter') + parser.add_argument('--rtol', + type=float, + default=1e-3, + help='The relative tolerance parameter') args = parser.parse_args() return args @@ -257,6 +266,8 @@ def main(): # read and compile model model = migraphx.parse_onnx(model_path_name, map_input_dims=param_shapes) + if args.fp16: + migraphx.quantize_fp16(model) model.compile(migraphx.get_target(target)) # get test cases @@ -279,7 +290,10 @@ def main(): output_data = run_one_case(model, input_data) # check output correctness - ret = check_correctness(gold_outputs, output_data) + ret = check_correctness(gold_outputs, + output_data, + atol=args.atol, + rtol=args.rtol) if ret: correct_num += 1 From 15acaee9c5ecd1f7f511f29cb7b533116f30e7f8 Mon Sep 17 00:00:00 2001 From: Umang Yadav <29876643+umangyadav@users.noreply.github.com> Date: Fri, 15 Sep 2023 13:40:22 -0400 Subject: [PATCH 3/5] Preserve layout of fused kernel for `layernorm+pointwise` (#2185) --- src/targets/gpu/prefuse_ops.cpp | 39 ++++++++++++++++++--------------- test/verify/test_layernorm.cpp | 24 ++++++++++++++++++-- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/targets/gpu/prefuse_ops.cpp b/src/targets/gpu/prefuse_ops.cpp index 5f3d37811db..0c93c6c67db 100644 --- a/src/targets/gpu/prefuse_ops.cpp +++ b/src/targets/gpu/prefuse_ops.cpp @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include #include #include #include @@ -45,40 +46,42 @@ struct layernorm_base } shape compute_shape(std::vector inputs, std::vector mods) const { - std::size_t nargs = 1; + std::size_t nargs = N; if(not mods.empty()) { auto* pm = mods.front(); - nargs = pm->get_parameter_names().size(); + nargs += pm->get_parameter_names().size() - 1; } - check_shapes{inputs, static_cast(*this)}.has(nargs + N); - auto s = inputs.at(0); + check_shapes{inputs, static_cast(*this)}.has(nargs); + auto s = inputs.front(); auto t = s.type(); if(not mods.empty()) t = mods.front()->get_output_shapes().front().type(); - if(s.scalar()) - { - return s; - } - else if(s.broadcasted()) - { - return {t, s.lens()}; - } - else - { - return s.with_lens(t, s.lens()); - } + + // Scalar output if all inputs are scalar + if(inputs.front().elements() == 1 and + all_of(inputs, [](const auto& ss) { return ss.scalar(); })) + return inputs.front(); + auto l_s = shape::from_permutation( + t, s.lens(), find_permutation(std::vector(inputs.begin(), inputs.begin() + N))); + // just prelayernorm or preadd_layernorm + if(nargs <= N) + return l_s; + // else, layernorm + pointwise fusion, preserve layout of fused op + std::vector lp_s(inputs.begin() + N, inputs.end()); + lp_s.insert(lp_s.begin(), l_s); + return shape::from_permutation(t, s.lens(), find_permutation(lp_s)); } }; -struct layernorm : layernorm_base +struct layernorm : layernorm_base { std::string name() const { return "gpu::prelayernorm"; } }; MIGRAPHX_REGISTER_OP(layernorm); -struct add_layernorm : layernorm_base +struct add_layernorm : layernorm_base { std::string name() const { return "gpu::preadd_layernorm"; } }; diff --git a/test/verify/test_layernorm.cpp b/test/verify/test_layernorm.cpp index 776e4fdfb49..8bc54bc2e9b 100644 --- a/test/verify/test_layernorm.cpp +++ b/test/verify/test_layernorm.cpp @@ -49,7 +49,8 @@ migraphx::instruction_ref add_layernorm(migraphx::module& m, auto pow = m.add_instruction(migraphx::make_op("pow"), sub, exponent_mbcast); auto var = m.add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2}}}), pow); auto epsilon_mbcast = m.add_instruction( - migraphx::make_op("multibroadcast", {{"out_lens", {1, dims.at(1), 1}}}), epsilon); + migraphx::make_op("multibroadcast", {{"out_lens", {dims.at(0), dims.at(1), 1}}}), epsilon); + auto add_epsilon = m.add_instruction(migraphx::make_op("add"), var, epsilon_mbcast); auto sqrt = m.add_instruction(migraphx::make_op("sqrt"), add_epsilon); auto sqrt_mbcast = @@ -57,7 +58,8 @@ migraphx::instruction_ref add_layernorm(migraphx::module& m, auto div = m.add_instruction(migraphx::make_op("div"), sub, sqrt_mbcast); auto scale_mbcast = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", dims}}), scale); - auto mul = m.add_instruction(migraphx::make_op("mul"), scale_mbcast, div); + auto mul = m.add_instruction(migraphx::make_op("mul"), div, scale_mbcast); + auto bias_mbcast = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", dims}}), bias); return m.add_instruction(migraphx::make_op("add"), mul, bias_mbcast); @@ -161,3 +163,21 @@ struct test_layernorm_triadd_large : verify_program return p; } }; + +struct test_add_layernorm_add_gemm_nonstd : verify_program +{ + migraphx::program create_program() const + { + migraphx::program p; + auto* mm = p.get_main_module(); + auto s = + migraphx::shape::from_permutation(migraphx::shape::float_type, {8, 1, 16}, {1, 2, 0}); + auto x = mm->add_parameter("x", s); + auto y = mm->add_parameter("y", s); + auto z = mm->add_parameter("z", migraphx::shape{migraphx::shape::float_type, {8, 16, 64}}); + auto add = mm->add_instruction(migraphx::make_op("add"), x, y); + auto layernorm_ins = add_layernorm(*mm, add, s.lens()); + mm->add_instruction(migraphx::make_op("dot"), layernorm_ins, z); + return p; + } +}; From 205306ac08ea32f37280f74eb58efbfa0c5fe0da Mon Sep 17 00:00:00 2001 From: Paul Fultz II Date: Fri, 15 Sep 2023 23:05:05 -0500 Subject: [PATCH 4/5] Enable mlir quick tuning (#2183) --- src/targets/gpu/include/migraphx/gpu/mlir.hpp | 3 ++- src/targets/gpu/jit/mlir.cpp | 4 +--- src/targets/gpu/mlir.cpp | 15 +++++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/targets/gpu/include/migraphx/gpu/mlir.hpp b/src/targets/gpu/include/migraphx/gpu/mlir.hpp index 34a85d00448..780869bcd7b 100644 --- a/src/targets/gpu/include/migraphx/gpu/mlir.hpp +++ b/src/targets/gpu/include/migraphx/gpu/mlir.hpp @@ -49,7 +49,8 @@ MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m, MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(const context& migraphx_ctx, module m, - const std::vector& inputs); + const std::vector& inputs, + bool exhaustive); } // namespace gpu } // namespace MIGRAPHX_INLINE_NS diff --git a/src/targets/gpu/jit/mlir.cpp b/src/targets/gpu/jit/mlir.cpp index f8bca1049bf..9cfd6acb51f 100644 --- a/src/targets/gpu/jit/mlir.cpp +++ b/src/targets/gpu/jit/mlir.cpp @@ -57,11 +57,9 @@ struct mlir_compiler : compiler const operation&, bool exhaustive) const { - if(not exhaustive) - return nullopt; auto shapes = to_shapes(ins->inputs()); auto* smod = ins->module_inputs().front(); - return get_tuning_config_mlir(ctx, *smod, shapes); + return get_tuning_config_mlir(ctx, *smod, shapes, exhaustive); } }; diff --git a/src/targets/gpu/mlir.cpp b/src/targets/gpu/mlir.cpp index c92fc4ba7b9..94b3761d0b6 100644 --- a/src/targets/gpu/mlir.cpp +++ b/src/targets/gpu/mlir.cpp @@ -682,11 +682,12 @@ struct mlir_program MIGRAPHX_THROW("Failed setting tuning key: " + *str); } - tuning_config get_tuning_config() MIGRAPHX_TIDY_CONST + tuning_config get_tuning_config(bool exhaustive) MIGRAPHX_TIDY_CONST { tuning_config tc; run_high_level_pipeline(); - auto tuning_mode = RocmlirTuningParamSetKindFull; + auto tuning_mode = + exhaustive ? RocmlirTuningParamSetKindFull : RocmlirTuningParamSetKindQuick; if(enabled(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE{})) tuning_mode = RocmlirTuningParamSetKindExhaustive; mlir_tuning_space params{mlirRockTuningSpaceCreate(mmodule.get(), tuning_mode)}; @@ -914,15 +915,17 @@ instruction_ref insert_mlir(module& m, return m.insert_instruction(ins, co, refs); } -tuning_config -get_tuning_config_mlir(const context& migraphx_ctx, module m, const std::vector& inputs) +tuning_config get_tuning_config_mlir(const context& migraphx_ctx, + module m, + const std::vector& inputs, + bool exhaustive) { adjust_param_shapes(m, inputs); mlir_program mp; mp.set_gpu_properties(migraphx_ctx); mp.parse(m); - return mp.get_tuning_config(); + return mp.get_tuning_config(exhaustive); } #else @@ -951,7 +954,7 @@ insert_mlir(module& m, instruction_ref, code_object_op co, const std::vector&) +tuning_config get_tuning_config_mlir(const context&, module, const std::vector&, bool) { return {}; } From 00640366b80285a9879a12764140090608ba5b6f Mon Sep 17 00:00:00 2001 From: ravil-mobile Date: Sat, 16 Sep 2023 13:13:25 +0200 Subject: [PATCH 5/5] Added support for standalone dot operations with mlir (#2169) --- src/targets/gpu/fuse_mlir.cpp | 60 +++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/src/targets/gpu/fuse_mlir.cpp b/src/targets/gpu/fuse_mlir.cpp index e9481b7d6e7..f2f2ccc8015 100644 --- a/src/targets/gpu/fuse_mlir.cpp +++ b/src/targets/gpu/fuse_mlir.cpp @@ -302,10 +302,8 @@ struct find_mlir_fused_ops } }; -struct find_mlir_standalone_convolution_op +struct find_mlir_standalone_op { - auto matcher() const { return match::name("convolution"); } - void apply(module_pass_manager& mpm, const match::matcher_result& r) const { auto conv_based_op = r.result; @@ -327,6 +325,16 @@ struct find_mlir_standalone_convolution_op } }; +struct find_mlir_standalone_convolution_op : find_mlir_standalone_op +{ + auto matcher() const { return match::name("convolution"); } +}; + +struct find_mlir_standalone_dot_op : find_mlir_standalone_op +{ + auto matcher() const { return match::name("dot"); } +}; + /** * @brief Declares a new MIGraphX environment variable which forces to generate * only specific MLIR operations. @@ -334,7 +342,7 @@ struct find_mlir_standalone_convolution_op * The variable, if defined, forces MIGraphX to use only specific operations * with MLIR regardless of the underlying GPU architecture. The variable accepts * a list of operations separated by comma. The variable recognizes the following - * operations: "fused", "convolution". If the variable is not defined MIGraphX + * operations: "fused", "convolution", "dot". If the variable is not defined MIGraphX * will decide by itself which operations to delegate to MLIR. The variable is * intended to be primarily used by rocMLIR developers. */ @@ -349,31 +357,33 @@ bool is_requested(std::string_view option) return contains(options, option); } -bool is_fusion_enabled() +bool is_enabled(std::string_view op_name, context* ctx) { if(is_self_decide()) { - return true; - } - return is_requested("fused"); -} - -bool is_standalone_convs_enabled(context* ctx) -{ - if(is_self_decide()) - { - if(ctx == nullptr) + if(op_name == "fused") { - return false; + return true; + } + else if(op_name == "convolution") + { + if(ctx == nullptr) + { + return false; + } + else + { + const auto& device = ctx->get_current_device(); + const std::string navi_family{"gfx110"}; + return starts_with(device.get_gfx_name(), navi_family); + } } else { - const auto& device = ctx->get_current_device(); - const std::string navi_family{"gfx110"}; - return starts_with(device.get_gfx_name(), navi_family); + return false; } } - return is_requested("convolution"); + return is_requested(op_name); } } // namespace @@ -382,21 +392,25 @@ bool is_standalone_convs_enabled(context* ctx) void fuse_mlir::apply(module_pass_manager& mpm) const { #ifdef MIGRAPHX_MLIR - if(is_fusion_enabled()) + if(is_enabled("fused", this->ctx)) { match::find_matches(mpm, find_mlir_fused_ops{}); } - if(is_standalone_convs_enabled(this->ctx)) + if(is_enabled("convolution", this->ctx)) { match::find_matches(mpm, find_mlir_standalone_convolution_op{}); } + + if(is_enabled("dot", this->ctx)) + { + match::find_matches(mpm, find_mlir_standalone_dot_op{}); + } #else (void)mpm; #endif } } // namespace gpu - } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx