Fix dyn pooling (#1768)

Adds support for dynamic input shape in pooling operator along with auto-padding. This combination requires that the padding (and therefore the output shape) can't be computed until runtime.
ROCm · Aug 29, 2023 · 7b8a28f · 7b8a28f
1 parent 4bce712
commit 7b8a28f
Show file tree

Hide file tree

Showing 14 changed files with 363 additions and 105 deletions.
diff --git a/src/include/migraphx/op/common.hpp b/src/include/migraphx/op/common.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -33,8 +33,12 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
 
+// Specifies where to add the "extra" cell of padding if the
+// calculated padding is an odd number.
 // Padding mode is default_ for fixed shape padding.
-// same_lower and same_upper used for dynamic padding.
+// same_lower and same_upper specify dynamic padding.
+// The odd cell goes at the beginning of the dimension
+// (same_lower) or end (same_upper).
 enum padding_mode_t
 {
     default_, // NOLINT

diff --git a/src/include/migraphx/op/convolution.hpp b/src/include/migraphx/op/convolution.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -206,6 +206,7 @@ struct convolution
         std::vector<std::size_t> new_padding;
         if(padding_mode != op::padding_mode_t::default_)
         {
+            // auto-Calculate the padding sizes with calc_dyn_auto_pad
             auto input_lens   = args[0].get_shape().lens();
             auto weights_lens = args[1].get_shape().lens();
             new_padding =
@@ -217,6 +218,7 @@ struct convolution
         }
         else
         {
+            // Use the padding that was given
             new_padding = padding;
             if(output_shape.dynamic())
             {

diff --git a/src/include/migraphx/op/pooling.hpp b/src/include/migraphx/op/pooling.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/argument.hpp>
+#include <migraphx/pad_calc.hpp>
 #include <migraphx/par_for.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/dyn_output.hpp>
@@ -40,10 +41,20 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
 
+// The Pooling operator mostly follows the specifications for the Onnx pooling op.
+// It assumes an NCHW layout, extended to support any number of spatial dimensions
+// from 1 on up; dimensions are <batch index, channels, spatial dimensions...>
+//
 struct pooling
 {
+    //  Class members mode, ceil_mode, padding_mode have similar names but refer to separate
+    //  concepts.
     pooling_mode mode = {pooling_mode::average};
 
+    // If the input has rank other than 4 then padding, stride, lengths must all be specified
+    // since the defaults have 2-dimensions.  Exception: padding not required if
+    // padding_mode != default_
+
     // Padding along each spatial input dimension
     // Can be ndim or 2*ndim values where ndim is size of lengths
     // ndim values means pad the same before and after each dimension
@@ -63,13 +74,14 @@ struct pooling
 
     // ceiling mode is a flag affecting output size
     // or equivalently, placements of the pooling kernel.
-    // When true, round the size upwards, possibly
-    // including partial placements where the kernel extends beyond the edge
-    // of input and even padding.  When false, round down so that all
+    // When true, round the size upwards.  When false, round down so that all
     // kernel placements fit but some input values may be dropped.
     bool ceil_mode = false;
     int lp_order   = 2;
 
+    // Mode for auto padding.  default_ indicates no auto padding.
+    padding_mode_t padding_mode = padding_mode_t::default_;
+
     // Global pooling with dynamic shape input
     bool dyn_global = false;
 
@@ -84,6 +96,7 @@ struct pooling
     {
         return pack(f(self.mode, "mode"),
                     f(self.padding, "padding"),
+                    f(self.padding_mode, "padding_mode"),
                     f(self.stride, "stride"),
                     f(self.lengths, "lengths"),
                     f(self.ceil_mode, "ceil_mode"),
@@ -97,7 +110,8 @@ struct pooling
     {
         if(dyn_global)
             return;
-        if((padding.size() != stride.size() and (padding.size()) != stride.size() * 2) or
+        if((padding_mode != default_ and padding.size() != stride.size() and
+            (padding.size()) != stride.size() * 2) or
            stride.size() != lengths.size())
         {
             MIGRAPHX_THROW("POOLING: inconsistent attribute sizes");
@@ -137,8 +151,19 @@ struct pooling
             std::size_t padding_factor = 2 * padding[i];
             if(padding.size() == 2 * kdims)
                 padding_factor = padding[i] + padding[i + kdims];
-            assert(input_lens[i + 2] + padding_factor >= lengths[i]);
-            std::size_t dim_size = input_lens[i + 2] + padding_factor - lengths[i];
+            std::size_t dim_size;
+            if(input_lens[i + 2] + padding_factor < lengths[i])
+            {
+                if(padding_mode == default_)
+                    MIGRAPHX_THROW("POOLING: not enough padding for the given kernel size");
+                // lengths can be legitimately larger only if we're doing auto padding
+                // with a dynamic shape, in which case given padding is ignored.  Set a dummy value.
+                dim_size = 2;
+            }
+            else
+            {
+                dim_size = input_lens[i + 2] + padding_factor - lengths[i];
+            }
             std::size_t len =
                 (ceil_mode)
                     ? dim_size / stride[i] +
@@ -151,17 +176,13 @@ struct pooling
 
     shape normalize_compute_shape(std::vector<shape> inputs) const
     {
-        check_shapes{inputs, *this, true}.has(1);
+        check_shapes{inputs, *this, true}.has(1).min_ndims(3);
         check_attribute_size();
 
         const shape& input = inputs.at(0);
-        auto padding_size  = padding.size();
+        auto stride_size   = stride.size();
         size_t kdims       = input.ndim() - 2;
-        if(input.ndim() < 3)
-        {
-            MIGRAPHX_THROW("POOLING: input must have 3 or more dimensions and be nonempty");
-        }
-        if(input.ndim() * 2 != padding_size + 4 and input.ndim() != padding_size + 2)
+        if(input.ndim() != stride_size + 2)
         {
             MIGRAPHX_THROW("POOLING: input and attribute size mismatch!");
         }
@@ -179,6 +200,28 @@ struct pooling
                 }
                 return {input.type(), output_dyn_dims};
             }
+            else if(padding_mode != default_)
+            {
+                const size_t num_spatial_dims = inputs[0].ndim() - 2;
+                const shape& x_shape          = inputs[0];
+                // same as convolution::dynamic_compute_shape()
+
+                for(std::size_t i = 0; i < num_spatial_dims; ++i)
+                {
+                    auto ceil_div = [](std::size_t x, std::size_t y) { return (x + y - 1) / y; };
+                    auto s        = stride[i];
+
+                    auto x = x_shape.dyn_dims()[i + 2];
+                    std::set<std::size_t> optimals{};
+                    std::transform(x.optimals.begin(),
+                                   x.optimals.end(),
+                                   std::inserter(optimals, optimals.begin()),
+                                   [&](auto o) { return ceil_div(o, s); });
+                    output_dyn_dims.push_back(
+                        shape::dynamic_dimension{ceil_div(x.min, s), ceil_div(x.max, s), optimals});
+                }
+                return {input.type(), output_dyn_dims};
+            }
             else
             {
                 // does not compute optimals
@@ -267,6 +310,7 @@ struct pooling
                       Out& output,
                       const In& input,
                       const std::vector<std::size_t>& kernel_dims,
+                      const std::vector<std::size_t>& padding_vals,
                       Op op) const
     {
         auto in_s    = input.get_shape();
@@ -283,9 +327,9 @@ struct pooling
             // For each spatial dimension, find starting and ending index of pooling kernel
             for(std::size_t dim = 2; dim < n_dim; ++dim)
             {
-                auto d_2 = dim - 2;
-                int start =
-                    static_cast<int>(idx_o[dim] * stride[d_2]) - static_cast<int>(padding[d_2]);
+                auto d_2  = dim - 2;
+                int start = static_cast<int>(idx_o[dim] * stride[d_2]) -
+                            static_cast<int>(padding_vals[d_2]);
                 int end;
                 // NOLINT
                 if(count_include_pad and ceil_mode and (mode != pooling_mode::max))
@@ -297,7 +341,7 @@ struct pooling
 
                     // Check if this kernel extends beyond the padding at end of dimension
                     end = std::min(start + kernel_dims[d_2],
-                                   in_lens[dim] + static_cast<int>(padding[d_2]));
+                                   in_lens[dim] + static_cast<int>(padding_vals[d_2]));
                 }
                 else
                 {
@@ -316,6 +360,7 @@ struct pooling
             }
 
             shape win_shape{output_shape.type(), win_size};
+
             auto pool_size    = win_shape.elements();
             double output_val = op.template init<Type>();
 
@@ -354,30 +399,65 @@ struct pooling
 
     argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
     {
-        argument result{dyn_out.computed_shape};
+        argument result;
         auto input_lens = args[0].get_shape().lens();
         std::vector<std::size_t> kernel_dims;
+        shape output_shape;
+        // If we have to auto-calculate padding, it will be passed to calc_pooling() as an argument
+        // instead of the member variable padding.
+        std::vector<std::size_t> temp_padding(padding);
         if(dyn_global)
         {
+            // for dynamic GlobalPooling, there's no padding
             kernel_dims.insert(kernel_dims.end(), input_lens.begin() + 2, input_lens.end());
+            output_shape = dyn_out.computed_shape;
+            result       = dyn_out.computed_shape;
         }
-        else
+        else if((padding_mode != op::padding_mode_t::default_))
         {
+            // if padding_mode is set, input was a dynamic size.  Calculate padded size now.
+
+            // kernel_lens is the same as kernel_dims, but prepended with the 2 non-
+            // spatial dimensions.  For size computations, it's used like the weights
+            // tensor for convolutions.
+            std::vector<std::size_t> kernel_lens;
+            kernel_lens.insert(kernel_lens.end(), input_lens.begin(), input_lens.begin() + 2);
+            kernel_lens.insert(kernel_lens.end(), lengths.begin(), lengths.end());
             kernel_dims = this->lengths;
+
+            auto type = args[0].get_shape().type();
+            // dilation not currently supported for pooling, so default to all 1's
+            temp_padding = calc_dyn_auto_pad(
+                input_lens, kernel_lens, stride, {1, 1}, bool(padding_mode == op::same_upper));
+
+            output_shape = compute_padded_pool_shape(
+                args[0].get_shape(), shape(type, kernel_dims), temp_padding, stride, {1, 1});
+
+            result = argument(output_shape);
+        }
+        else // fixed/static input
+        {
+            kernel_dims  = this->lengths;
+            output_shape = dyn_out.computed_shape;
+            result       = dyn_out.computed_shape;
         }
+
+        // Perform the computation and populate result
         visit_all(result, args[0])([&](auto output, auto input) {
             using type = typename decltype(output)::value_type;
             switch(mode)
             {
             case migraphx::op::pooling_mode::average:
-                calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, avg_pool{});
+                calc_pooling<type>(
+                    output_shape, output, input, kernel_dims, temp_padding, avg_pool{});
                 break;
             case migraphx::op::pooling_mode::max:
-                calc_pooling<type>(dyn_out.computed_shape, output, input, kernel_dims, max_pool{});
+                calc_pooling<type>(
+                    output_shape, output, input, kernel_dims, temp_padding, max_pool{});
                 break;
             case migraphx::op::pooling_mode::lpnorm:
                 calc_pooling<type>(
-                    dyn_out.computed_shape, output, input, kernel_dims, lpnorm_pool{lp_order});
+                    output_shape, output, input, kernel_dims, temp_padding, lpnorm_pool{lp_order});
                 break;
             }
         });

diff --git a/src/include/migraphx/pad_calc.hpp b/src/include/migraphx/pad_calc.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -62,6 +62,14 @@ shape compute_padded_shape(const shape& input,
                            const std::vector<std::size_t>& stride,
                            const std::vector<std::size_t>& dilation);
 
+// Used for dynamic auto padding of pooling operators where padding needs to be computed at
+// evaulation time.
+shape compute_padded_pool_shape(const shape& input,
+                                const shape& kernel,
+                                const std::vector<std::size_t>& padding,
+                                const std::vector<std::size_t>& stride,
+                                const std::vector<std::size_t>& dilation);
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
 

diff --git a/src/normalize_attributes.cpp b/src/normalize_attributes.cpp
@@ -26,7 +26,7 @@
 #include <migraphx/normalize_attributes.hpp>
 #include <migraphx/stringutils.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
-
+#include <migraphx/op/common.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 
@@ -192,20 +192,27 @@ bool normalize_attributes(operation& op, const shape& input_shape)
     auto val   = op.to_value();
     if(attrs.contains("normalize_padding"))
     {
-        auto padding       = val.at(attrs.at("normalize_padding").to<std::string>());
-        auto padding_size  = padding.size();
-        auto padding_start = 2;
-
-        if(padding_size == 2 * (input_shape.ndim() - padding_start))
-            tuned = true;
-        else if(padding_size != (input_shape.ndim() - padding_start))
-            MIGRAPHX_THROW("inconsistent padding size");
-        else
+        bool use_auto_padding =
+            (val.contains("padding_mode") and
+             (val.at("padding_mode").to<int>() != migraphx::op::padding_mode_t::default_));
+        if(not use_auto_padding)
         {
-            auto result    = tune_pad_attribute(padding);
-            val["padding"] = result;
-            op.from_value(val);
-            tuned = true;
+            auto padding       = val.at(attrs.at("normalize_padding").to<std::string>());
+            auto padding_size  = padding.size();
+            auto padding_start = 2;
+            if(padding_size == 2 * (input_shape.ndim() - padding_start))
+                tuned = true;
+            else if(padding_size != (input_shape.ndim() - padding_start))
+            {
+                MIGRAPHX_THROW("normalize_attributes: inconsistent padding vector size ");
+            }
+            else
+            {
+                auto result    = tune_pad_attribute(padding);
+                val["padding"] = result;
+                op.from_value(val);
+                tuned = true;
+            }
         }
     }
     if(not attrs.contains("normalize_axes"))