Merge branch 'main' into stablehlo-transpose

iml130 · Jul 18, 2023 · d9c9b01 · d9c9b01
2 parents 82a09d8 + 7297909
commit d9c9b01
Show file tree

Hide file tree

Showing 6 changed files with 253 additions and 2 deletions.
diff --git a/build_tools/llvm_version.txt b/build_tools/llvm_version.txt
@@ -1 +1 @@
-d954d9758a4f6021c2ff6edb1365e511398ae58d
+a2426eb603afaf6c04ddd14921b474d2caffa12b
diff --git a/docs/tosa-op-coverage.md b/docs/tosa-op-coverage.md
@@ -36,12 +36,14 @@ The table below shows the supported TOSA ops.
 | select                 | :heavy_check_mark: | |
 | **Other ops**
 | argmax                 | :heavy_check_mark: | |
+| avg_pool2d             | :white_check_mark: | Quantization and and acc_type not supported |
 | concat                 | :heavy_check_mark: | |
 | conv2d                 | :white_check_mark: | Quantization and dilation not supported |
 | depthwise_conv2d       | :white_check_mark: | Quantization and dilation not supported |
 | fully_connected        | :white_check_mark: | Quantization not supported |
 | gather                 | :heavy_check_mark: | |
 | matmul                 | :white_check_mark: | Quantization not supported |
+| max_pool2d             | :white_check_mark: | Quantization not supported |
 | reduce_all             | :heavy_check_mark: | |
 | reduce_any             | :heavy_check_mark: | |
 | reduce_max             | :heavy_check_mark: | |

diff --git a/lib/Conversion/TosaToEmitC/TosaToEmitC.cpp b/lib/Conversion/TosaToEmitC/TosaToEmitC.cpp
@@ -141,6 +141,46 @@ class GenericConvOpConversion : public OpConversionPattern<SrcOp> {
   StringRef funcName;
 };
 
+/// Convert a common `tosa` pooling operation into an `emitc.call`
+/// operation.
+template <typename SrcOp, typename Adaptor = typename SrcOp::Adaptor>
+class GenericPoolOpConversion : public OpConversionPattern<SrcOp> {
+  using OpConversionPattern<SrcOp>::OpConversionPattern;
+
+public:
+  GenericPoolOpConversion(MLIRContext *ctx, StringRef funcName)
+      : OpConversionPattern<SrcOp>(ctx), funcName(funcName) {}
+
+private:
+  LogicalResult
+  matchAndRewrite(SrcOp poolOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    StringAttr callee = rewriter.getStringAttr(funcName);
+
+    // TODO: average pool has an acc_type attribute.
+    // clang-format off
+    ArrayAttr args = rewriter.getArrayAttr({
+      rewriter.getIndexAttr(0),
+      getI64ElementsAttr(poolOp.getPad(), poolOp.getContext()),
+      getI64ElementsAttr(poolOp.getStride(), poolOp.getContext()),
+      getI64ElementsAttr(poolOp.getKernel(), poolOp.getContext()),
+    });
+    // clang-format on
+
+    ArrayAttr templateArgs =
+        rewriter.getArrayAttr({TypeAttr::get(poolOp.getResult().getType())});
+
+    // Create pool op.
+    rewriter.replaceOpWithNewOp<emitc::CallOp>(poolOp, poolOp.getType(), callee,
+                                               args, templateArgs,
+                                               adaptor.getOperands());
+
+    return success();
+  }
+
+  StringRef funcName;
+};
+
 /// Convert `tosa.fully_connected` into an `emitc.call` operation.
 class FullyConnectedOpConversion
     : public OpConversionPattern<tosa::FullyConnectedOp> {
@@ -830,6 +870,10 @@ void populateTosaToEmitcPatterns(MLIRContext *ctx,
                                                         "emitc::tosa::conv2d");
   patterns.add<GenericConvOpConversion<tosa::DepthwiseConv2DOp>>(
       ctx, "emitc::tosa::depthwise_conv2d");
+  patterns.add<GenericPoolOpConversion<tosa::AvgPool2dOp>>(
+      ctx, "emitc::tosa::avg_pool2d");
+  patterns.add<GenericPoolOpConversion<tosa::MaxPool2dOp>>(
+      ctx, "emitc::tosa::max_pool2d");
   patterns.add<FullyConnectedOpConversion>(ctx, "emitc::tosa::fully_connected");
   patterns.add<GenericOpConversion<tosa::GatherOp>>(
       ctx, "emitc::tosa::gather",
@@ -907,7 +951,9 @@ struct ConvertTosaToEmitCPass
     target.addIllegalOp<tosa::SelectOp>();
 
     // Other ops.
-    target.addIllegalOp<tosa::ConcatOp,
+    target.addIllegalOp<tosa::AvgPool2dOp,
+                        tosa::MaxPool2dOp,
+                        tosa::ConcatOp,
                         tosa::Conv2DOp,
                         tosa::DepthwiseConv2DOp,
                         tosa::FullyConnectedOp,

diff --git a/reference-implementation/include/emitc/tosa.h b/reference-implementation/include/emitc/tosa.h
@@ -456,6 +456,121 @@ Dest depthwise_conv2d(Src input, Weights weights, Tensor1D<int64_t, 4> padding,
   return output;
 }
 
+// MaxPool2d
+template <typename Dest, typename Src>
+Dest max_pool2d(Src input, std::array<int64_t, 4> padding,
+                std::array<int64_t, 2> stride, std::array<int64_t, 2> kernel) {
+  static_assert(is_tensor_of_dim<4, Src>::value,
+                "Expected 4 dimensional input");
+  static_assert(is_tensor_of_dim<4, Dest>::value,
+                "Expected 4 dimensional output");
+  using ET_Dest = typename get_element_type<Dest>::type;
+  assert(stride[0] > 0);
+  assert(stride[1] > 0);
+  const int N = input.dim(0);
+  const int H_IN = input.dim(1);
+  const int W_IN = input.dim(2);
+  const int C = input.dim(3);
+  Dest output;
+  const int K_H = kernel[0];
+  const int K_W = kernel[1];
+  const int S_H = stride[0];
+  const int S_W = stride[1];
+  const int pt = padding[0];
+  const int pb = padding[1];
+  const int pl = padding[2];
+  const int pr = padding[3];
+  const int H_PAD = pt + H_IN + pb;
+  const int W_PAD = pl + W_IN + pr;
+  // Pooling
+  for (int n = 0; n < N; n++) {
+    for (int h_pad = 0; h_pad < H_PAD - K_H + 1; h_pad += S_H) {
+      for (int w_pad = 0; w_pad < W_PAD - K_W + 1; w_pad += S_W) {
+        for (int c = 0; c < C; c++) {
+          const int h_out = h_pad / S_H;
+          const int w_out = w_pad / S_W;
+          output(n, h_out, w_out, c) = std::numeric_limits<ET_Dest>::min();
+          for (int kh = 0; kh < K_H; kh++) {
+            for (int kw = 0; kw < K_W; kw++) {
+              const int h_in = h_pad - pt + kh;
+              const int w_in = w_pad - pl + kw;
+              if (h_in < 0 || h_in >= H_IN || w_in < 0 || w_in >= W_IN)
+                continue;
+              output(n, h_out, w_out, c) =
+                  std::max(output(n, h_out, w_out, c), input(n, h_in, w_in, c));
+            }
+          }
+        }
+      }
+    }
+  }
+  return output;
+}
+
+// AvgPool2d
+template <typename Dest, typename Src>
+Dest avg_pool2d(Src input, std::array<int64_t, 4> padding,
+                std::array<int64_t, 2> stride, std::array<int64_t, 2> kernel) {
+  static_assert(is_tensor_of_dim<4, Src>::value,
+                "Expected 4 dimensional input");
+  static_assert(is_tensor_of_dim<4, Dest>::value,
+                "Expected 4 dimensional output");
+
+  using ET_Dest = typename get_element_type<Dest>::type;
+  static_assert(std::is_same<ET_Dest, float>::value,
+                "Only float data type supported");
+
+  assert(stride[0] > 0);
+  assert(stride[1] > 0);
+
+  const int N = input.dim(0);
+  const int H_IN = input.dim(1);
+  const int W_IN = input.dim(2);
+  const int C = input.dim(3);
+
+  Dest output;
+
+  const int K_H = kernel[0];
+  const int K_W = kernel[1];
+  const int S_H = stride[0];
+  const int S_W = stride[1];
+  const int pt = padding[0];
+  const int pb = padding[1];
+  const int pl = padding[2];
+  const int pr = padding[3];
+  const int H_PAD = pt + H_IN + pb;
+  const int W_PAD = pl + W_IN + pr;
+
+  // Pooling
+  for (int n = 0; n < N; n++) {
+    for (int h_pad = 0; h_pad < H_PAD - K_H + 1; h_pad += S_H) {
+      for (int w_pad = 0; w_pad < W_PAD - K_W + 1; w_pad += S_W) {
+        for (int c = 0; c < C; c++) {
+          const int h_out = h_pad / S_H;
+          const int w_out = w_pad / S_W;
+
+          ET_Dest acc = ET_Dest(0);
+          size_t count = 0;
+
+          for (int kh = 0; kh < K_H; kh++) {
+            for (int kw = 0; kw < K_W; kw++) {
+              const int h_in = h_pad - pt + kh;
+              const int w_in = w_pad - pl + kw;
+              if (h_in < 0 || h_in >= H_IN || w_in < 0 || w_in >= W_IN)
+                continue;
+
+              count++;
+              acc += input(n, h_in, w_in, c);
+            }
+          }
+          output(n, h_out, w_out, c) = acc / static_cast<ET_Dest>(count);
+        }
+      }
+    }
+  }
+  return output;
+}
+
 // FullyConnectedOp
 template <typename Dest, typename Src, typename Weights, typename Bias>
 Dest fully_connected(Src input, Weights weights, Bias bias) {

diff --git a/reference-implementation/unittests/tosa.cpp b/reference-implementation/unittests/tosa.cpp
@@ -713,6 +713,82 @@ TEST(tosa, depthwise_conv2d) {
   }
 }
 
+TEST(tosa, max_pool2d) {
+  {
+    //              N IH IW C
+    Tensor4D<float, 1, 3, 3, 2> input{1.f,  2.f,  3.f,  4.f,  5.f,  6.f,
+                                      7.f,  8.f,  9.f,  10.f, 11.f, 12.f,
+                                      13.f, 14.f, 15.f, 16.f, 17.f, 18.f};
+    std::array<int64_t, 4> padding{0, 0, 0, 0};
+    std::array<int64_t, 2> stride{1, 1};
+    std::array<int64_t, 2> kernel{2, 2};
+
+    using ResultType = Tensor4D<float, 1, 2, 2, 2>; // N OH OW C
+    ResultType expected_result{9.f, 10.f, 11.f, 12.f, 15.f, 16.f, 17.f, 18.f};
+    ResultType result =
+        tosa::max_pool2d<ResultType>(input, padding, stride, kernel);
+
+    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
+  }
+  {
+    //              N IH IW C
+    Tensor4D<float, 2, 3, 4, 2> input{
+        1.f,  2.f,  3.f,  4.f,  5.f,  6.f,  7.f,  8.f,  9.f,  10.f, 11.f, 12.f,
+        13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f,
+        25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, 32.f, 33.f, 34.f, 35.f, 36.f,
+        37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f, 48.f};
+    std::array<int64_t, 4> padding{2, 1, 0, 2}; // {pt, pb, pl, pr}
+    std::array<int64_t, 2> stride{3, 2};        // {sy, sx}
+    std::array<int64_t, 2> kernel{3, 4};        // {ky, kx}
+
+    using ResultType = Tensor4D<float, 2, 2, 2, 2>; // N OH OW C
+    ResultType expected_result{7.f,  8.f,  7.f,  8.f,  23.f, 24.f, 23.f, 24.f,
+                               31.f, 32.f, 31.f, 32.f, 47.f, 48.f, 47.f, 48.f};
+    ResultType result =
+        tosa::max_pool2d<ResultType>(input, padding, stride, kernel);
+
+    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
+  }
+}
+
+TEST(tosa, avg_pool2d) {
+  {
+    //              N IH IW C
+    Tensor4D<float, 1, 3, 3, 2> input{1.f,  2.f,  3.f,  4.f,  5.f,  6.f,
+                                      7.f,  8.f,  9.f,  10.f, 11.f, 12.f,
+                                      13.f, 14.f, 15.f, 16.f, 17.f, 18.f};
+    std::array<int64_t, 4> padding{0, 0, 0, 0};
+    std::array<int64_t, 2> stride{1, 1};
+    std::array<int64_t, 2> kernel{2, 2};
+
+    using ResultType = Tensor4D<float, 1, 2, 2, 2>; // N OH OW C
+    ResultType expected_result{5.f, 6.f, 7.f, 8.f, 11.f, 12.f, 13.f, 14.f};
+    ResultType result =
+        tosa::avg_pool2d<ResultType>(input, padding, stride, kernel);
+
+    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
+  }
+  {
+    //              N IH IW C
+    Tensor4D<float, 2, 3, 4, 2> input{
+        1.f,  2.f,  3.f,  4.f,  5.f,  6.f,  7.f,  8.f,  9.f,  10.f, 11.f, 12.f,
+        13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f,
+        25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, 32.f, 33.f, 34.f, 35.f, 36.f,
+        37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f, 48.f};
+    std::array<int64_t, 4> padding{2, 1, 0, 2}; // {pt, pb, pl, pr}
+    std::array<int64_t, 2> stride{3, 2};        // {sy, sx}
+    std::array<int64_t, 2> kernel{3, 4};        // {ky, kx}
+
+    using ResultType = Tensor4D<float, 2, 2, 2, 2>; // N OH OW C
+    ResultType expected_result{4.f,  5.f,  6.f,  7.f,  16.f, 17.f, 18.f, 19.f,
+                               28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f};
+    ResultType result =
+        tosa::avg_pool2d<ResultType>(input, padding, stride, kernel);
+
+    EXPECT_THAT(result, Pointwise(FloatNear(EPSILON), expected_result));
+  }
+}
+
 TEST(tosa, fully_connected) {
   using InputType = Tensor2D<float, 2, 5>;  // N CIN
   using WeightType = Tensor2D<float, 2, 5>; // COUT CIN

diff --git a/test/Conversion/tosa-to-emitc.mlir b/test/Conversion/tosa-to-emitc.mlir
@@ -280,6 +280,18 @@ func.func @test_depthwise_conv2d(%arg0: tensor<1x4x5x2xf32>, %arg1: tensor<2x2x2
     return %0 : tensor<1x3x4x4xf32>
 }
 
+func.func @test_max_pool2d(%arg0: tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32> {
+  // CHECK: emitc.call "emitc::tosa::max_pool2d"(%arg0) {args = [0 : index, dense<0> : tensor<4xi64>, dense<1> : tensor<2xi64>, dense<1> : tensor<2xi64>], template_args = [tensor<1x32x32x8xf32>]} : (tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32>
+  %0 = "tosa.max_pool2d"(%arg0) {kernel = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>} : (tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32>
+  return %0 : tensor<1x32x32x8xf32>
+}
+
+func.func @test_avg_pool2d(%arg0: tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32> {
+  // CHECK: emitc.call "emitc::tosa::avg_pool2d"(%arg0) {args = [0 : index, dense<[0, 1, 0, 1]> : tensor<4xi64>, dense<1> : tensor<2xi64>, dense<2> : tensor<2xi64>], template_args = [tensor<1x32x32x8xf32>]} : (tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32>
+  %0 = "tosa.avg_pool2d"(%arg0) {acc_type = f32, kernel = array<i64: 2, 2>, pad = array<i64: 0, 1, 0, 1>, stride = array<i64: 1, 1>} : (tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32>
+  return %0 : tensor<1x32x32x8xf32>
+}
+
 func.func @test_fully_connected(%arg0: tensor<14x19xf32>, %arg1: tensor<19x28xf32>, %arg2: tensor<28xf32>) -> tensor<14x28xf32> {
   // CHECK: emitc.call "emitc::tosa::fully_connected"(%arg0, %arg1, %arg2) {template_args = [tensor<14x28xf32>]} : (tensor<14x19xf32>, tensor<19x28xf32>, tensor<28xf32>) -> tensor<14x28xf32>
   %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<14x19xf32>, tensor<19x28xf32>, tensor<28xf32>) -> tensor<14x28xf32>