add contour_expand and pixel_group in parrots (#1323)

b6eb3822 · pc · GitHub · 4bab2924 · b6eb3822 · b6eb3822
Unverified Commit b6eb3822 authored Sep 23, 2021 by pc Committed by GitHub Sep 23, 2021
9 changed files
--- a/mmcv/ops/contour_expand.py
+++ b/mmcv/ops/contour_expand.py
@@ -21,7 +21,7 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
        kernel_num (int): The instance kernel number.

    Returns:
-        label (np.array or Tensor): The instance index map with size hxw.
+        label (list): The instance index map with size hxw.
    """
    assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
    assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
@@ -33,6 +33,17 @@ def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
    if isinstance(internal_kernel_label, np.ndarray):
        internal_kernel_label = torch.from_numpy(internal_kernel_label)

-    label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
-                                      min_kernel_area, kernel_num)
+    if torch.__version__ == 'parrots':
+        if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
+            label = []
+        else:
+            label = ext_module.contour_expand(
+                kernel_mask,
+                internal_kernel_label,
+                min_kernel_area=min_kernel_area,
+                kernel_num=kernel_num)
+            label = label.tolist()
+    else:
+        label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
+                                          min_kernel_area, kernel_num)
    return label
--- a/mmcv/ops/csrc/parrots/contour_expand.cpp
+++ b/mmcv/ops/csrc/parrots/contour_expand.cpp
+// Copyright (c) OpenMMLab. All rights reserved
+// It is modified from https://github.com/whai362/PSENet
+#include <iostream>
+#include <queue>
+
+#include "pytorch_cpp_helper.hpp"
+
+using namespace std;
+
+class Point2d {
+ public:
+  int x;
+  int y;
+
+  Point2d() : x(0), y(0) {}
+  Point2d(int _x, int _y) : x(_x), y(_y) {}
+};
+
+void kernel_dilate(const uint8_t *data, IntArrayRef data_shape,
+                   const int *label_map, int &label_num, int &min_area,
+                   vector<vector<int>> &text_line) {
+  std::vector<int> area(label_num + 1);
+  int kernel_num = data_shape[0];
+  int height = data_shape[1];
+  int width = data_shape[2];
+
+  for (int x = 0; x < height; ++x) {
+    for (int y = 0; y < width; ++y) {
+      int label = label_map[x * width + y];
+      if (label == 0) continue;
+      area[label] += 1;
+    }
+  }
+
+  queue<Point2d> queue, next_queue;
+  for (int x = 0; x < height; ++x) {
+    vector<int> row(width);
+    for (int y = 0; y < width; ++y) {
+      int label = label_map[x * width + y];
+      if (label == 0) continue;
+      if (area[label] < min_area) continue;
+
+      Point2d point(x, y);
+      queue.push(point);
+      row[y] = label;
+    }
+    text_line.emplace_back(row);
+  }
+
+  int dx[] = {-1, 1, 0, 0};
+  int dy[] = {0, 0, -1, 1};
+  vector<int> kernel_step(kernel_num);
+  std::for_each(kernel_step.begin(), kernel_step.end(),
+                [=](int &k) { return k * height * width; });
+
+  for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) {
+    while (!queue.empty()) {
+      Point2d point = queue.front();
+      queue.pop();
+      int x = point.x;
+      int y = point.y;
+      int label = text_line[x][y];
+
+      bool is_edge = true;
+      for (int d = 0; d < 4; ++d) {
+        int tmp_x = x + dx[d];
+        int tmp_y = y + dy[d];
+
+        if (tmp_x < 0 || tmp_x >= height) continue;
+        if (tmp_y < 0 || tmp_y >= width) continue;
+        int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y];
+        if (kernel_value == 0) continue;
+        if (text_line[tmp_x][tmp_y] > 0) continue;
+
+        Point2d point(tmp_x, tmp_y);
+        queue.push(point);
+        text_line[tmp_x][tmp_y] = label;
+        is_edge = false;
+      }
+
+      if (is_edge) {
+        next_queue.push(point);
+      }
+    }
+    swap(queue, next_queue);
+  }
+}
+
+std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
+                                             Tensor internal_kernel_label,
+                                             int min_kernel_area,
+                                             int kernel_num) {
+  kernel_mask = kernel_mask.contiguous();
+  internal_kernel_label = internal_kernel_label.contiguous();
+  assert(kernel_mask.dim() == 3);
+  assert(internal_kernel_label.dim() == 2);
+  assert(kernel_mask.size(1) == internal_kernel_label.size(0));
+  assert(kernel_mask.size(2) == internal_kernel_label.size(1));
+  CHECK_CPU_INPUT(kernel_mask);
+  CHECK_CPU_INPUT(internal_kernel_label);
+  auto ptr_data = kernel_mask.data_ptr<uint8_t>();
+  IntArrayRef data_shape = kernel_mask.sizes();
+
+  auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
+  IntArrayRef label_map_shape = internal_kernel_label.sizes();
+  vector<vector<int>> text_line;
+
+  kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
+                min_kernel_area, text_line);
+
+  return text_line;
+}
--- a/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
+// Copyright (c) OpenMMLab. All rights reserved
+#include <parrots/compute/aten.hpp>
+#include <parrots/extension.hpp>
+#include <parrots/foundation/ssattrs.hpp>
+
+#include "contour_expand_pytorch.h"
+
+using namespace parrots;
+using namespace std;
+
+template <typename T>
+void contour_expand_parrots(T& ctx, const SSElement& attr,
+                            const OperatorBase::in_list_t& ins,
+                            OperatorBase::out_list_t& outs) {
+  int min_kernel_area, kernel_num;
+  SSAttrs(attr)
+      .get<int>("min_kernel_area", min_kernel_area)
+      .get<int>("kernel_num", kernel_num)
+      .done();
+  at::Tensor kernel_mask;
+  at::Tensor internal_kernel_label;
+  kernel_mask = buildATensor(ctx, ins[0]);
+  internal_kernel_label = buildATensor(ctx, ins[1]);
+  auto out = contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
+                            kernel_num);
+  int n = out.size(), m = 0;
+  for (int i = 0; i < n; ++i)
+    if (m < out[i].size()) m = out[i].size();
+  auto options = torch::TensorOptions().dtype(at::kInt);
+  auto tensor = torch::zeros({n, m}, options);
+  for (int i = 0; i < n; i++)
+    tensor.slice(0, i, i + 1) =
+        torch::from_blob(out[i].data(), {out[i].size()}, options);
+  updateDArray(ctx, tensor, outs[0]);
+}
+
+PARROTS_EXTENSION_REGISTER(contour_expand)
+    .attr("min_kernel_area")
+    .attr("kernel_num")
+    .input(2)
+    .output(1)
+    .apply(contour_expand_parrots<HostContext>)
+    .done();
--- a/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
+++ b/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
+// Copyright (c) OpenMMLab. All rights reserved
+#ifndef CONTOUR_EXPAND_PYTORCH_H
+#define CONTOUR_EXPAND_PYTORCH_H
+#include <torch/extension.h>
+using namespace at;
+
+std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
+                                             Tensor internal_kernel_label,
+                                             int min_kernel_area,
+                                             int kernel_num);
+
+#endif  // CONTOUR_EXPAND_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/pixel_group.cpp
+++ b/mmcv/ops/csrc/parrots/pixel_group.cpp
+// Copyright (c) OpenMMLab. All rights reserved
+// It is modified from https://github.com/WenmuZhou/PAN.pytorch
+
+#include "pytorch_cpp_helper.hpp"
+
+std::vector<std::vector<float>> estimate_confidence(int32_t* label,
+                                                    float* score, int label_num,
+                                                    int height, int width) {
+  std::vector<std::vector<float>> point_vector;
+  for (int i = 0; i < label_num; i++) {
+    std::vector<float> point;
+    point.push_back(0);
+    point.push_back(0);
+    point_vector.push_back(point);
+  }
+  for (int y = 0; y < height; y++) {
+    auto label_tmp = label + y * width;
+    auto score_tmp = score + y * width;
+    for (int x = 0; x < width; x++) {
+      auto l = label_tmp[x];
+      if (l > 0) {
+        float confidence = score_tmp[x];
+        point_vector[l].push_back(x);
+        point_vector[l].push_back(y);
+        point_vector[l][0] += confidence;
+        point_vector[l][1] += 1;
+      }
+    }
+  }
+  for (int l = 0; l < point_vector.size(); l++)
+    if (point_vector[l][1] > 0) {
+      point_vector[l][0] /= point_vector[l][1];
+    }
+  return point_vector;
+}
+std::vector<std::vector<float>> pixel_group_cpu(
+    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
+    Tensor kernel_contour, int kernel_region_num, float dis_threshold) {
+  assert(score.dim() == 2);
+  assert(mask.dim() == 2);
+  assert(embedding_dim.dim() == 3);
+  int height = score.size(0);
+  int width = score.size(1);
+  assert(height == mask.size(0) == embedding.size(1) == kernel_label.size(1));
+  assert(width == mask.size(1) == embedding.size(2) == kernel_label.size(2));
+
+  auto threshold_square = dis_threshold * dis_threshold;
+  auto ptr_score = score.data_ptr<float>();
+  auto ptr_mask = mask.data_ptr<bool>();
+  auto ptr_kernel_contour = kernel_contour.data_ptr<uint8_t>();
+  auto ptr_embedding = embedding.data_ptr<float>();
+  auto ptr_kernel_label = kernel_label.data_ptr<int32_t>();
+  std::queue<std::tuple<int, int, int32_t>> contour_pixels;
+  auto embedding_dim = embedding.size(2);
+  std::vector<std::vector<float>> kernel_vector(
+      kernel_region_num, std::vector<float>(embedding_dim + 1, 0));
+
+  Tensor text_label;
+  text_label = kernel_label.clone();
+  auto ptr_text_label = text_label.data_ptr<int32_t>();
+
+  for (int i = 0; i < height; i++) {
+    auto ptr_embedding_tmp = ptr_embedding + i * width * embedding_dim;
+    auto ptr_kernel_label_tmp = ptr_kernel_label + i * width;
+    auto ptr_kernel_contour_tmp = ptr_kernel_contour + i * width;
+
+    for (int j = 0, k = 0; j < width && k < width * embedding_dim;
+         j++, k += embedding_dim) {
+      int32_t label = ptr_kernel_label_tmp[j];
+      if (label > 0) {
+        for (int d = 0; d < embedding_dim; d++)
+          kernel_vector[label][d] += ptr_embedding_tmp[k + d];
+        kernel_vector[label][embedding_dim] += 1;
+        // kernel pixel number
+        if (ptr_kernel_contour_tmp[j]) {
+          contour_pixels.push(std::make_tuple(i, j, label));
+        }
+      }
+    }
+  }
+  for (int i = 0; i < kernel_region_num; i++) {
+    for (int j = 0; j < embedding_dim; j++) {
+      kernel_vector[i][j] /= kernel_vector[i][embedding_dim];
+    }
+  }
+  int dx[4] = {-1, 1, 0, 0};
+  int dy[4] = {0, 0, -1, 1};
+  while (!contour_pixels.empty()) {
+    auto query_pixel = contour_pixels.front();
+    contour_pixels.pop();
+    int y = std::get<0>(query_pixel);
+    int x = std::get<1>(query_pixel);
+    int32_t l = std::get<2>(query_pixel);
+    auto kernel_cv = kernel_vector[l];
+    for (int idx = 0; idx < 4; idx++) {
+      int tmpy = y + dy[idx];
+      int tmpx = x + dx[idx];
+      auto ptr_text_label_tmp = ptr_text_label + tmpy * width;
+      if (tmpy < 0 || tmpy >= height || tmpx < 0 || tmpx >= width) continue;
+      if (!ptr_mask[tmpy * width + tmpx] || ptr_text_label_tmp[tmpx] > 0)
+        continue;
+
+      float dis = 0;
+      auto ptr_embedding_tmp = ptr_embedding + tmpy * width * embedding_dim;
+      for (size_t i = 0; i < embedding_dim; i++) {
+        dis +=
+            pow(kernel_cv[i] - ptr_embedding_tmp[tmpx * embedding_dim + i], 2);
+        // ignore further computing if dis is big enough
+        if (dis >= threshold_square) break;
+      }
+      if (dis >= threshold_square) continue;
+      contour_pixels.push(std::make_tuple(tmpy, tmpx, l));
+      ptr_text_label_tmp[tmpx] = l;
+    }
+  }
+
+  return estimate_confidence(ptr_text_label, ptr_score, kernel_region_num,
+                             height, width);
+}
+
+std::vector<std::vector<float>> pixel_group(
+    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
+    Tensor kernel_contour, int kernel_region_num, float distance_threshold) {
+  score = score.contiguous();
+  mask = mask.contiguous();
+  embedding = embedding.contiguous();
+  kernel_label = kernel_label.contiguous();
+  kernel_contour = kernel_contour.contiguous();
+
+  CHECK_CPU_INPUT(score);
+  CHECK_CPU_INPUT(mask);
+  CHECK_CPU_INPUT(embedding);
+  CHECK_CPU_INPUT(kernel_label);
+  CHECK_CPU_INPUT(kernel_contour);
+  return pixel_group_cpu(score, mask, embedding, kernel_label, kernel_contour,
+                         kernel_region_num, distance_threshold);
+}
--- a/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp
+// Copyright (c) OpenMMLab. All rights reserved
+#include <parrots/compute/aten.hpp>
+#include <parrots/extension.hpp>
+#include <parrots/foundation/ssattrs.hpp>
+
+#include "pixel_group_pytorch.h"
+
+using namespace parrots;
+using namespace std;
+
+template <typename T>
+void pixel_group_parrots(T& ctx, const SSElement& attr,
+                         const OperatorBase::in_list_t& ins,
+                         OperatorBase::out_list_t& outs) {
+  int kernel_region_num;
+  float distance_threshold;
+  SSAttrs(attr)
+      .get<int>("kernel_region_num", kernel_region_num)
+      .get<float>("distance_threshold", distance_threshold)
+      .done();
+  at::Tensor score;
+  at::Tensor mask;
+  at::Tensor embedding;
+  at::Tensor kernel_label;
+  at::Tensor kernel_contour;
+  score = buildATensor(ctx, ins[0]);
+  mask = buildATensor(ctx, ins[1]);
+  embedding = buildATensor(ctx, ins[2]);
+  kernel_label = buildATensor(ctx, ins[3]);
+  kernel_contour = buildATensor(ctx, ins[4]);
+  auto out = pixel_group(score, mask, embedding, kernel_label, kernel_contour,
+                         kernel_region_num, distance_threshold);
+  int n = out.size();
+  std::vector<float> out_tensor;
+  for (int i = 0; i < n; ++i) out_tensor.push_back(float(out[i].size()));
+  for (int i = 0; i < n; ++i)
+    out_tensor.insert(out_tensor.end(), out[i].begin(), out[i].end());
+  auto options = torch::TensorOptions().dtype(at::kFloat);
+  auto tensor = torch::zeros({1, out_tensor.size()}, options);
+  tensor.slice(0, 0, 1) =
+      torch::from_blob(out_tensor.data(), {out_tensor.size()}, options);
+  updateDArray(ctx, tensor, outs[0]);
+}
+
+PARROTS_EXTENSION_REGISTER(pixel_group)
+    .attr("kernel_region_num")
+    .attr("distance_threshold")
+    .input(5)
+    .output(1)
+    .apply(pixel_group_parrots<HostContext>)
+#ifdef MMCV_WITH_CUDA
+    .apply(pixel_group_parrots<CudaContext>)
+#endif
+    .done();
--- a/mmcv/ops/csrc/parrots/pixel_group_pytorch.h
+++ b/mmcv/ops/csrc/parrots/pixel_group_pytorch.h
+// Copyright (c) OpenMMLab. All rights reserved
+#ifndef PIXEL_GROUP_PYTORCH_H
+#define PIXEL_GROUP_PYTORCH_H
+#include <torch/extension.h>
+using namespace at;
+
+std::vector<std::vector<float>> pixel_group(
+    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
+    Tensor kernel_contour, int kernel_region_num, float distance_threshold);
+
+#endif  // PIXEL_GROUP_PYTORCH_H
--- a/mmcv/ops/pixel_group.py
+++ b/mmcv/ops/pixel_group.py
@@ -48,8 +48,28 @@ def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
    if isinstance(kernel_contour, np.ndarray):
        kernel_contour = torch.from_numpy(kernel_contour)

-    pixel_assignment = ext_module.pixel_group(score, mask, embedding,
-                                              kernel_label, kernel_contour,
-                                              kernel_region_num,
-                                              distance_threshold)
+    if torch.__version__ == 'parrots':
+        label = ext_module.pixel_group(
+            score,
+            mask,
+            embedding,
+            kernel_label,
+            kernel_contour,
+            kernel_region_num=kernel_region_num,
+            distance_threshold=distance_threshold)
+        label = label.tolist()
+        label = label[0]
+        list_index = kernel_region_num
+        pixel_assignment = []
+        for x in range(kernel_region_num):
+            pixel_assignment.append(
+                np.array(
+                    label[list_index:list_index + int(label[x])],
+                    dtype=np.float))
+            list_index = list_index + int(label[x])
+    else:
+        pixel_assignment = ext_module.pixel_group(score, mask, embedding,
+                                                  kernel_label, kernel_contour,
+                                                  kernel_region_num,
+                                                  distance_threshold)
    return pixel_assignment
--- a/mmcv/utils/ext_loader.py
+++ b/mmcv/utils/ext_loader.py
@@ -34,6 +34,8 @@ else:
        'fused_bias_leakyrelu',
        'upfirdn2d',
        'ms_deform_attn_forward',
+        'pixel_group',
+        'contour_expand',
    ]

    def get_fake_func(name, e):