Add padding='same' mode to conv{1,2,3}d (#45667)

Summary: Pull Request resolved: #45667 First part of #3867 (Pooling operators still to do) This adds a `padding='same'` mode to the interface of `conv{n}d`and `nn.Conv{n}d`. This should match the behaviour of `tensorflow`. I couldn't find it explicitly documented but through experimentation I found `tensorflow` returns the shape `ceil(len/stride)` and always adds any extra asymmetric padding onto the right side of the input. Since the `native_functions.yaml` schema doesn't seem to support strings or enums, I've moved the function interface into python and it now dispatches between the numerically padded `conv{n}d` and the `_conv{n}d_same` variant. Underscores because I couldn't see any way to avoid exporting a function into the `torch` namespace. A note on asymmetric padding. The total padding required can be odd if both the kernel-length is even and the dilation is odd. mkldnn has native support for asymmetric padding, so there is no overhead there, but for other backends I resort to padding the input tensor by 1 on the right hand side to make the remaining padding symmetrical. In these cases, I use `TORCH_WARN_ONCE` to notify the user of the performance implications. Test Plan: Imported from OSS Reviewed By: ejguan Differential Revision: D27170744 Pulled By: jbschlosser fbshipit-source-id: b3d8a0380e0787ae781f2e5d8ee365a7bfd49f22
pytorch · Mar 18, 2021 · 04e0cbf · 04e0cbf
1 parent a8a1090
commit 04e0cbf
Show file tree

Hide file tree

Showing 18 changed files with 892 additions and 65 deletions.
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
@@ -1,6 +1,7 @@
 #include <ATen/ATen.h>
 #include <ATen/Parallel.h>
 #include <ATen/native/ConvUtils.h>
+#include <ATen/native/Pool.h>
 #include <ATen/native/cpu/DepthwiseConvKernel.h>
 #include <ATen/native/utils/ParamUtils.h>
 #include <ATen/native/xnnpack/Engine.h>
@@ -572,6 +573,110 @@ at::Tensor conv3d(
                          false, {{0, 0, 0}}, groups);
 }
 
+
+static Tensor convolution_same(
+    const Tensor &input, const Tensor &weight, const Tensor &bias,
+    IntArrayRef stride, IntArrayRef dilation, int64_t groups) {
+
+  auto k = weight.dim();
+  auto dim = k - 2;
+  TORCH_CHECK(dim > 0, "weight should have at least three dimensions");
+  auto weight_sizes = weight.sizes();
+  auto input_sizes = input.sizes();
+  TORCH_CHECK(k == input.dim(),
+              "Expected ", k, "-dimensional input for ",
+              k, "-dimensional weight", weight_sizes, ", but got ",
+              input.dim(), "-dimensional input of size ",
+              input.sizes(), " instead");
+  TORCH_CHECK(stride.size() == dim || stride.size() == 1,
+              "stride cannot broadcast to ", dim, " dimensions");
+  TORCH_CHECK(dilation.size() == dim || dilation.size() == 1,
+              "dilation cannot broadcast to ", dim, " dimensions");
+  for (int64_t i = 0; i < stride.size(); ++i) {
+    TORCH_CHECK(stride[i] == 1, "padding='same' is not supported for strided convolutions");
+  }
+
+  // Calculate the correct padding
+  DimVector padding_l, padding_r;
+  bool symmetric_padding = true;
+  for (int64_t i = 0; i < dim; ++i) {
+    auto s = stride.size() == 1 ? stride[0] : stride[i];
+    auto d = dilation.size() == 1 ? dilation[0] : dilation[i];
+    auto pad = pooling_same_mode_padding_lr(
+        input_sizes[i + 2], weight_sizes[i + 2], s, d);
+    padding_l.push_back(pad.first);
+    padding_r.push_back(pad.second);
+    if (pad.first != pad.second) {
+      symmetric_padding = false;
+    }
+  }
+
+  if (symmetric_padding) {
+    // All backends handle symmetric padding natively
+    DimVector output_padding(static_cast<size_t>(dim));
+    return native::convolution(input, weight, bias, stride, padding_l, dilation,
+                               false, output_padding, groups);
+  }
+
+  TORCH_WARN_ONCE("Using padding='same' with even kernel lengths and odd dilation may"
+                  " require a zero-padded copy of the input be created");
+  SmallVector<int64_t, kDimVectorStaticSize * 2> pad_nd(static_cast<size_t>(2 * dim));
+  for (int i = 0; i < dim; ++i) {
+    // Apply padding by the difference, leaving only a symmetric padding
+    auto delta_pad = padding_r[i] - padding_l[i];
+    auto pad_idx = 2 * (dim - 1 - i);  // F.pad goes from last dim to first
+    if (delta_pad > 0) {
+      pad_nd[pad_idx + 1] = delta_pad;
+    } else {
+      pad_nd[pad_idx] = delta_pad;
+      padding_l[i] = padding_r[i];
+    }
+  }
+  auto padded_input = at::constant_pad_nd(input, pad_nd, 0);
+  DimVector output_padding(static_cast<size_t>(dim));
+  return at::convolution(padded_input, weight, bias, stride, padding_l,
+                         dilation, false, output_padding, groups);
+}
+
+Tensor _convolution_mode(
+    const Tensor& input, const Tensor& weight, const Tensor& bias,
+    IntArrayRef stride, std::string padding, IntArrayRef dilation,
+    int64_t groups) {
+  if (padding == "same") {
+    return at::native::convolution_same(
+        input, weight, bias, stride, dilation, groups);
+  } else if (padding == "valid") {
+    const int64_t padding_[] = {0};
+    return at::native::convolution(
+        input, weight, bias, stride, padding_, dilation, false, padding_, groups);
+  }
+  TORCH_CHECK(false, "Invalid padding string: '", padding, "'");
+}
+
+at::Tensor conv1d(
+    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
+    IntArrayRef stride, std::string padding, IntArrayRef dilation,
+    int64_t groups) {
+  return at::_convolution_mode(
+      input, weight, bias, stride, std::move(padding), dilation, groups);
+}
+
+at::Tensor conv2d(
+    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
+    IntArrayRef stride, std::string padding, IntArrayRef dilation,
+    int64_t groups) {
+  return at::_convolution_mode(
+      input, weight, bias, stride, std::move(padding), dilation, groups);
+}
+
+at::Tensor conv3d(
+    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
+    IntArrayRef stride, std::string padding, IntArrayRef dilation,
+    int64_t groups) {
+  return at::_convolution_mode(
+      input, weight, bias, stride, std::move(padding), dilation, groups);
+}
+
 at::Tensor conv_transpose1d(
     const Tensor& input, const Tensor& weight, const Tensor& bias,
     IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, int64_t groups, IntArrayRef dilation) {

diff --git a/aten/src/ATen/native/Pool.h b/aten/src/ATen/native/Pool.h
@@ -46,6 +46,24 @@ static inline T pooling_output_shape(
         inputSize, kernelSize, pad, pad, stride, dilation, ceil_mode);
 }
 
+inline std::pair<int64_t, int64_t> pooling_same_mode_padding_lr(
+    int64_t inputSize, int64_t kernelSize, int64_t stride, int64_t dilation) {
+  // NOTE: with strides, the output shape is ceil(inputSize/stride)
+  auto total_padding = dilation * (kernelSize - 1);
+
+  // Prefer symmetric padding if possible
+  if (stride > 2 && (total_padding % 2 == 1)) {
+    // The floor in the output size calculation gives us a little wiggle room
+    auto wiggle_room = inputSize % stride - 1;
+    if (wiggle_room > 0) {
+      --total_padding;
+    }
+  }
+
+  auto left = total_padding / 2;
+  return {left, total_padding - left};
+}
+
 
 // AveragePool2d/DilatedMaxPool2d (forward)
 static inline void

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -1025,6 +1025,9 @@
 - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
 
+- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
+  use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
+
 - func: _convolution_nogroup(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding) -> Tensor
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
 
@@ -1040,6 +1043,15 @@
 - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
 
+- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding="valid", int[1] dilation=1, int groups=1) -> Tensor
+  cpp_no_default_args: ['bias', 'stride', 'padding']
+
+- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, str padding="valid", int[2] dilation=1, int groups=1) -> Tensor
+  cpp_no_default_args: ['bias', 'stride', 'padding']
+
+- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, str padding="valid", int[3] dilation=1, int groups=1) -> Tensor
+  cpp_no_default_args: ['bias', 'stride', 'padding']
+
 - func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
   dispatch:
     DefaultBackend: conv_tbc

diff --git a/aten/src/ATen/templates/Functions.cpp b/aten/src/ATen/templates/Functions.cpp
@@ -25,6 +25,27 @@ std::tuple<Tensor,Tensor> std_mean(const Tensor& self, int dim) {
   return at::std_mean(self, IntArrayRef{dim});
 }
 
+at::Tensor conv1d(
+    const Tensor& input, const Tensor& weight, const Tensor& bias, IntArrayRef stride,
+    std::initializer_list<int64_t> padding_, IntArrayRef dilation, int64_t groups) {
+  auto padding = IntArrayRef(padding_);
+  return at::conv1d(input, weight, bias, stride, padding, dilation, groups);
+}
+
+at::Tensor conv2d(
+    const Tensor& input, const Tensor& weight, const Tensor& bias, IntArrayRef stride,
+    std::initializer_list<int64_t> padding_, IntArrayRef dilation, int64_t groups) {
+  auto padding = IntArrayRef(padding_);
+  return at::conv2d(input, weight, bias, stride, padding, dilation, groups);
+}
+
+at::Tensor conv3d(
+    const Tensor& input, const Tensor& weight, const Tensor& bias, IntArrayRef stride,
+    std::initializer_list<int64_t> padding_, IntArrayRef dilation, int64_t groups) {
+  auto padding = IntArrayRef(padding_);
+  return at::conv3d(input, weight, bias, stride, padding, dilation, groups);
+}
+
 ${function_definitions}
 
 }
diff --git a/aten/src/ATen/templates/Functions.h b/aten/src/ATen/templates/Functions.h
@@ -51,6 +51,19 @@ TORCH_API std::tuple<Tensor,Tensor> var_mean(const Tensor& self, int dim);
 TORCH_API Tensor std(const Tensor& self, int dim);
 TORCH_API std::tuple<Tensor,Tensor> std_mean(const Tensor& self, int dim);
 
+
+// Special C++ only overloads for convnd functions (See gh-45667)
+// These are needed because {1, 2} is ambiguous between string and IntArrayRef overloads
+TORCH_API at::Tensor conv1d(
+    const Tensor& input, const Tensor& weight, const Tensor& bias, IntArrayRef stride,
+    std::initializer_list<int64_t> padding, IntArrayRef dilation = 1, int64_t groups = 1);
+TORCH_API at::Tensor conv2d(
+    const Tensor& input, const Tensor& weight, const Tensor& bias, IntArrayRef stride,
+    std::initializer_list<int64_t> padding, IntArrayRef dilation = 1, int64_t groups = 1);
+TORCH_API at::Tensor conv3d(
+    const Tensor& input, const Tensor& weight, const Tensor& bias, IntArrayRef stride,
+    std::initializer_list<int64_t> padding, IntArrayRef dilation = 1, int64_t groups = 1);
+
 namespace {
   inline std::vector<int64_t> zero_sizes(const TensorOptions& options) {
     if (options.has_memory_format()) {

diff --git a/c10/util/overloaded.h b/c10/util/overloaded.h
@@ -0,0 +1,30 @@
+#pragma once
+
+namespace c10 {
+namespace detail {
+
+template<class...Ts>
+struct overloaded_t {};
+
+template<class T0>
+struct overloaded_t<T0>:T0 {
+  using T0::operator();
+  overloaded_t(T0 t0):T0(std::move(t0)) {}
+};
+template<class T0, class...Ts>
+struct overloaded_t<T0, Ts...>:T0, overloaded_t<Ts...> {
+  using T0::operator();
+  using overloaded_t<Ts...>::operator();
+  overloaded_t(T0 t0, Ts... ts):
+    T0(std::move(t0)),
+    overloaded_t<Ts...>(std::move(ts)...)
+  {}
+};
+
+}  // namespace detail
+
+// Construct an overloaded callable combining multiple callables, e.g. lambdas
+template<class...Ts>
+detail::overloaded_t<Ts...> overloaded(Ts...ts){ return {std::move(ts)...}; }
+
+}  // namespace c10
diff --git a/test/cpp/api/modules.cpp b/test/cpp/api/modules.cpp
@@ -55,6 +55,15 @@ TEST_F(ModulesTest, Conv1d) {
   ASSERT_EQ(model->weight.grad().numel(), 3 * 2 * 3);
 }
 
+TEST_F(ModulesTest, Conv1dSameStrided) {
+  auto options = Conv1dOptions(3, 2, 3);
+  options.stride(1).padding(torch::kSame);
+  Conv1d model_valid(options);
+  ASSERT_THROWS_WITH(
+    [&]{ Conv1d model_invalid(options.stride(2)); }(),
+    "padding='same' is not supported for strided convolutions");
+}
+
 TEST_F(ModulesTest, Conv2dEven) {
   Conv2d model(Conv2dOptions(3, 2, 3).stride(1).bias(false));
   model->weight.set_data(torch::arange(54, torch::dtype(torch::kFloat)).reshape({2, 3, 3, 3}));
@@ -95,6 +104,18 @@ TEST_F(ModulesTest, Conv2dUneven) {
   ASSERT_EQ(model->weight.grad().numel(), 3 * 2 * 3 * 2);
 }
 
+TEST_F(ModulesTest, Conv2dSameStrided) {
+  auto options = Conv2dOptions(3, 2, {3, 4});
+  options.stride(1).padding(torch::kSame);
+  Conv2d model_valid(options);
+  ASSERT_THROWS_WITH(
+    [&]{ Conv2d model_invalid(options.stride(2)); }(),
+    "padding='same' is not supported for strided convolutions");
+  ASSERT_THROWS_WITH(
+    [&]{ Conv2d model_invalid(options.stride({1, 2})); }(),
+    "padding='same' is not supported for strided convolutions");
+}
+
 TEST_F(ModulesTest, Conv3d) {
   Conv3d model(Conv3dOptions(3, 2, 3).stride(1).bias(false));
   model->weight.set_data(torch::arange(162, torch::dtype(torch::kFloat)).reshape({2, 3, 3, 3, 3}));
@@ -131,6 +152,18 @@ TEST_F(ModulesTest, Conv3d) {
   ASSERT_TRUE(model->weight.grad().numel() == 3 * 2 * 3 * 3 * 3);
 }
 
+TEST_F(ModulesTest, Conv3dSameStrided) {
+  auto options = Conv3dOptions(3, 2, {3, 4, 5});
+  options.stride(1).padding(torch::kSame);
+  Conv3d model_valid(options);
+  ASSERT_THROWS_WITH(
+    [&]{ Conv3d model_invalid(options.stride(2)); }(),
+    "padding='same' is not supported for strided convolutions");
+  ASSERT_THROWS_WITH(
+    [&]{ Conv3d model_invalid(options.stride({1, 2, 1})); }(),
+    "padding='same' is not supported for strided convolutions");
+}
+
 TEST_F(ModulesTest, ConvTranspose1d) {
   ConvTranspose1d model(ConvTranspose1dOptions(3, 2, 3).stride(1).bias(false));
   model->weight.set_data(torch::arange(18.).view({2, 3, 3}));