Commit 91da9643 authored by limm's avatar limm
Browse files

support v2.1.0

parent 6f674c7e
#include "pytorch_npu_helper.hpp"
using namespace NPU_NAME_SPACE;
using namespace std;
void roi_align_forward_npu(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x, int aligned_height,
int aligned_width, float spatial_scale,
int sampling_ratio, int pool_mode, bool aligned) {
int64_t roi_end_mode = 2;
if (!aligned) {
LOG(WARNING) << "The [aligned] attr in roi_align op is false";
roi_end_mode = 0;
}
int64_t aligned_height_64 = aligned_height;
int64_t aligned_width_64 = aligned_width;
int64_t sampling_ratio_64 = sampling_ratio;
OpCommand cmd;
cmd.Name("ROIAlign")
.Input(input)
.Input(rois)
.Output(output)
.Attr("spatial_scale", spatial_scale)
.Attr("pooled_height", aligned_height_64)
.Attr("pooled_width", aligned_width_64)
.Attr("sample_num", sampling_ratio_64)
.Attr("roi_end_mode", roi_end_mode)
.Run();
}
void roi_align_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax_y,
Tensor argmax_x, Tensor grad_input,
int aligned_height, int aligned_width,
float spatial_scale, int sampling_ratio,
int pool_mode, bool aligned) {
int64_t aligned_height_64 = aligned_height;
int64_t aligned_width_64 = aligned_width;
int64_t sampling_ratio_64 = sampling_ratio;
int64_t roi_end_mode = 2;
if (!aligned) {
LOG(WARNING) << "The [aligned] attr in roi_align_grad op is false";
roi_end_mode = 0;
}
c10::SmallVector<int64_t, SIZE> xdiff_shape =
array_to_small_vector(grad_input.sizes());
OpCommand cmd;
cmd.Name("ROIAlignGrad")
.Input(grad_output)
.Input(rois)
.Output(grad_input)
.Attr("xdiff_shape", xdiff_shape)
.Attr("pooled_width", aligned_width_64)
.Attr("pooled_height", aligned_height_64)
.Attr("spatial_scale", spatial_scale)
.Attr("sample_num", sampling_ratio_64)
.Attr("roi_end_mode", roi_end_mode)
.Run();
}
void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x,
int aligned_height, int aligned_width,
float spatial_scale, int sampling_ratio,
int pool_mode, bool aligned);
void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
Tensor argmax_x, Tensor grad_input,
int aligned_height, int aligned_width,
float spatial_scale, int sampling_ratio,
int pool_mode, bool aligned);
REGISTER_NPU_IMPL(roi_align_forward_impl, roi_align_forward_npu);
REGISTER_NPU_IMPL(roi_align_backward_impl, roi_align_backward_npu);
......@@ -9,21 +9,39 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
int64_t pooled_height_64 = pooled_height;
int64_t pooled_width_64 = pooled_width;
int64_t pooled_channel = 1;
at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
{}, rois.options().dtype(at::kInt), rois);
OpCommand cmd;
cmd.Name("RoiPoolingWithArgMax")
.Input(input)
.Input(rois)
.Input(roi_actual_num)
.Output(output)
.Output(argmax)
.Attr("pooled_h", pooled_height_64)
.Attr("pooled_w", pooled_width_64)
.Attr("spatial_scale_h", spatial_scale)
.Attr("spatial_scale_w", spatial_scale)
.Attr("pool_channel", pooled_channel)
.Run();
at::Tensor roi_actual_num =
at::empty_like(rois, rois.options().dtype(at::kInt));
if (input.sizes()[1] % 16 == 0) {
OpCommand cmd;
cmd.Name("RoiPoolingWithArgMax")
.Input(input)
.Input(rois)
.Input(roi_actual_num)
.Output(output)
.Output(argmax)
.Attr("pooled_h", pooled_height_64)
.Attr("pooled_w", pooled_width_64)
.Attr("spatial_scale_h", spatial_scale)
.Attr("spatial_scale_w", spatial_scale)
.Attr("pool_channel", pooled_channel)
.Run();
} else {
OpCommand cmd;
cmd.Name("RoiPoolingWithArgMax")
.Input(input)
.Input(rois)
.Input(roi_actual_num)
.Output(output)
.Output(argmax)
.Attr("pooled_h", pooled_height_64)
.Attr("pooled_w", pooled_width_64)
.Attr("spatial_scale_h", spatial_scale)
.Attr("spatial_scale_w", spatial_scale)
.Attr("pool_channel", pooled_channel)
.Attr("_exclude_engines", (string) "AiCore")
.Run();
}
}
void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
......@@ -32,8 +50,8 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
int64_t pooled_height_64 = pooled_height;
int64_t pooled_width_64 = pooled_width;
int64_t pooled_channel = 1;
at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
{}, rois.options().dtype(at::kInt), rois);
at::Tensor roi_actual_num =
at::empty_like(rois, rois.options().dtype(at::kInt));
at::Tensor x = at::ones_like(grad_input);
OpCommand cmd;
cmd.Name("RoiPoolingGradWithArgMax")
......
......@@ -18,19 +18,16 @@ int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3) {
at::Tensor voxel_num_tmp = OpPreparation::ApplyTensor(points, {1});
at::Tensor voxel_num = at_npu::native::NPUNativeFunctions::npu_dtype_cast(
voxel_num_tmp, at::kInt);
at::Tensor voxel_num_tmp = at::empty({1}, points.options());
at::Tensor voxel_num = voxel_num_tmp.to(at::kInt);
at::Tensor voxel_size_cpu = at::from_blob(
const_cast<float *>(voxel_size.data()), {3}, dtype(at::kFloat));
at::Tensor voxel_size_npu =
CalcuOpUtil::CopyTensorHostToDevice(voxel_size_cpu);
at::Tensor voxel_size_npu = voxel_size_cpu.to(points.device());
at::Tensor coors_range_cpu = at::from_blob(
const_cast<float *>(coors_range.data()), {6}, dtype(at::kFloat));
at::Tensor coors_range_npu =
CalcuOpUtil::CopyTensorHostToDevice(coors_range_cpu);
at::Tensor coors_range_npu = coors_range_cpu.to(points.device());
int64_t max_points_ = (int64_t)max_points;
int64_t max_voxels_ = (int64_t)max_voxels;
......
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/base/basedef.h"
#include "csrc_dipu/diopirt/diopirt_impl.h"
using dipu::diopi_helper::toDiopiScalar;
using dipu::diopi_helper::toDiopiTensorHandle;
#endif
void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x,
......@@ -22,20 +33,114 @@ void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
spatial_scale, sampling_ratio, pool_mode, aligned);
}
#ifdef MMCV_WITH_DIOPI
void roi_align_forward_diopi(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x,
int aligned_height, int aligned_width,
float spatial_scale, int sampling_ratio,
int pool_mode, bool aligned) {
auto input_p = toDiopiTensorHandle(input);
diopiDevice_t device;
diopiGetTensorDevice(input_p, &device);
if (device == diopi_host) {
roi_align_forward_impl(input, rois, output, argmax_y, argmax_x,
aligned_height, aligned_width, spatial_scale,
sampling_ratio, pool_mode, aligned);
return;
}
diopiContext ctx(dipu::getCurrentDIPUStream().rawstream());
diopiContextHandle_t ch = &ctx;
auto rois_p = toDiopiTensorHandle(rois);
auto out_p = toDiopiTensorHandle(output);
auto argmax_y_p = toDiopiTensorHandle(argmax_y);
auto argmax_x_p = toDiopiTensorHandle(argmax_x);
bool is_mock_cuda = input.device().type() == dipu::DIPU_DEVICE_TYPE;
if (is_mock_cuda && reinterpret_cast<void *>(diopiRoiAlignMmcv) != nullptr) {
auto ret = diopiRoiAlignMmcv(
ch, out_p, argmax_y_p, argmax_x_p, input_p, rois_p, aligned_height,
aligned_width, sampling_ratio, pool_mode, spatial_scale, aligned);
if (ret == diopiSuccess) return;
}
LOG(WARNING) << "Fallback to cpu: mmcv ext op roi_align_forward";
auto input_cpu = input.cpu();
auto rois_cpu = rois.cpu();
auto out_cpu = output.cpu();
auto argmax_y_cpu = argmax_y.cpu();
auto argmax_x_cpu = argmax_x.cpu();
roi_align_forward_impl(input_cpu, rois_cpu, out_cpu, argmax_y_cpu,
argmax_x_cpu, aligned_height, aligned_width,
spatial_scale, sampling_ratio, pool_mode, aligned);
output.copy_(out_cpu);
}
void roi_align_backward_diopi(Tensor grad_output, Tensor rois, Tensor argmax_y,
Tensor argmax_x, Tensor grad_input,
int aligned_height, int aligned_width,
float spatial_scale, int sampling_ratio,
int pool_mode, bool aligned) {
auto grad_output_ = toDiopiTensorHandle(grad_output);
diopiDevice_t device;
diopiGetTensorDevice(grad_output_, &device);
if (device == diopi_host) {
roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input,
aligned_height, aligned_width, spatial_scale,
sampling_ratio, pool_mode, aligned);
return;
}
auto rois_ = toDiopiTensorHandle(rois);
auto argmax_y_ = toDiopiTensorHandle(argmax_y);
auto argmax_x_ = toDiopiTensorHandle(argmax_x);
auto grad_input_ = toDiopiTensorHandle(grad_input);
diopiContext ctx(dipu::getCurrentDIPUStream().rawstream());
diopiContextHandle_t ch = &ctx;
bool is_mock_cuda = grad_output.device().type() == dipu::DIPU_DEVICE_TYPE;
if (is_mock_cuda &&
reinterpret_cast<void *>(diopiRoiAlignBackwardMmcv) != nullptr) {
auto ret = diopiRoiAlignBackwardMmcv(ch, grad_input_, grad_output_, rois_,
argmax_y_, argmax_x_, aligned_height,
aligned_width, sampling_ratio,
pool_mode, spatial_scale, aligned);
if (ret == diopiSuccess) return;
}
LOG(WARNING) << "Fallback to cpu: mmcv ext op roi_align_backward";
auto grad_output_cpu = grad_output.cpu();
auto rois_cpu = rois.cpu();
auto argmax_y_cpu = argmax_y.cpu();
auto argmax_x_cpu = argmax_x.cpu();
auto grad_input_cpu = grad_input.cpu();
roi_align_backward_impl(grad_output_cpu, rois_cpu, argmax_y_cpu, argmax_x_cpu,
grad_input_cpu, aligned_height, aligned_width,
spatial_scale, sampling_ratio, pool_mode, aligned);
grad_input.copy_(grad_input_cpu);
}
#endif
void roi_align_forward(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x, int aligned_height,
int aligned_width, float spatial_scale,
int sampling_ratio, int pool_mode, bool aligned) {
#ifdef MMCV_WITH_DIOPI
roi_align_forward_diopi(input, rois, output, argmax_y, argmax_x,
aligned_height, aligned_width, spatial_scale,
sampling_ratio, pool_mode, aligned);
#else
roi_align_forward_impl(input, rois, output, argmax_y, argmax_x,
aligned_height, aligned_width, spatial_scale,
sampling_ratio, pool_mode, aligned);
#endif
}
void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
Tensor argmax_x, Tensor grad_input, int aligned_height,
int aligned_width, float spatial_scale,
int sampling_ratio, int pool_mode, bool aligned) {
#ifdef MMCV_WITH_DIOPI
roi_align_backward_diopi(grad_output, rois, argmax_y, argmax_x, grad_input,
aligned_height, aligned_width, spatial_scale,
sampling_ratio, pool_mode, aligned);
#else
roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input,
aligned_height, aligned_width, spatial_scale,
sampling_ratio, pool_mode, aligned);
#endif
}
......@@ -35,6 +35,26 @@ std::vector<torch::Tensor> get_indice_pairs_forward_cuda(
padding, dilation, outPadding, _subM, _transpose);
};
template <unsigned NDim>
std::vector<torch::Tensor> GetIndicePairsForwardMLUKernelLauncher(
torch::Tensor indices, int64_t batchSize,
std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
std::vector<int64_t> padding, std::vector<int64_t> dilation,
std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose);
template <unsigned NDim>
std::vector<torch::Tensor> get_indice_pairs_forward_mlu(
torch::Tensor indices, int64_t batchSize,
std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
std::vector<int64_t> padding, std::vector<int64_t> dilation,
std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
return GetIndicePairsForwardMLUKernelLauncher<NDim>(
indices, batchSize, outSpatialShape, spatialShape, kernelSize, stride,
padding, dilation, outPadding, _subM, _transpose);
}
template <unsigned NDim>
std::vector<torch::Tensor> GetIndicePairsBackwardCUDAKernelLauncher(
torch::Tensor indices, torch::Tensor gridOut, int64_t batchSize,
......@@ -71,6 +91,12 @@ std::vector<torch::Tensor> get_indice_pairs_forward(
padding, dilation, outPadding, _subM, _transpose);
#else
AT_ERROR("get_indice_pairs is not compiled with GPU support");
#endif
#ifdef MMCV_WITH_MLU
} else if (indices.device().type() == at::kMLU) {
return get_indice_pairs_forward_mlu<NDim>(
indices, batchSize, outSpatialShape, spatialShape, kernelSize, stride,
padding, dilation, outPadding, _subM, _transpose);
#endif
} else {
AT_ERROR("get_indice_pairs is not implemented on CPU");
......
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using dipu::diopi_helper::toDiopiScalar;
using dipu::diopi_helper::toDiopiTensorHandle;
#endif
int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
......@@ -33,6 +43,132 @@ void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
coors_range, NDim);
}
#ifdef MMCV_WITH_DIOPI
void hard_voxelize_forward_diopi(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range,
at::Tensor &voxels, at::Tensor &coors,
at::Tensor &num_points_per_voxel,
at::Tensor &voxel_num, const int max_points,
const int max_voxels, const int NDim = 3,
const bool deterministic = true) {
auto points_p = toDiopiTensorHandle(points);
diopiDevice_t device;
diopiGetTensorDevice(points_p, &device);
if (device == diopi_host) {
int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
voxel_size.data_ptr<float>() + voxel_size.numel());
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (deterministic) {
*voxel_num_data = hard_voxelize_forward_impl(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
} else {
TORCH_CHECK(
deterministic,
"nondeterministic hard_voxelize_forward is not supported on host!");
}
return;
}
diopiContext ctx(dipu::getCurrentDIPUStream().rawstream());
diopiContextHandle_t ch = &ctx;
auto voxel_size_p = toDiopiTensorHandle(voxel_size);
auto coors_range_p = toDiopiTensorHandle(coors_range);
auto voxels_p = toDiopiTensorHandle(voxels);
auto coors_p = toDiopiTensorHandle(coors);
auto num_points_per_voxel_p = toDiopiTensorHandle(num_points_per_voxel);
auto voxel_num_p = toDiopiTensorHandle(voxel_num);
if (reinterpret_cast<void *>(diopiHardVoxelizeMmcv) != nullptr) {
auto ret = diopiHardVoxelizeMmcv(
ch, voxels_p, coors_p, num_points_per_voxel_p, voxel_num_p, points_p,
voxel_size_p, coors_range_p, max_points, max_voxels, NDim,
deterministic);
if (ret == diopiSuccess) return;
}
LOG(WARNING) << "Fallback to cpu: mmcv ext op hard_voxelize_forward";
auto points_cpu = points.cpu();
auto voxel_size_cpu = voxel_size.cpu();
auto coors_range_cpu = coors_range.cpu();
auto voxels_cpu = voxels.cpu();
auto coors_cpu = coors.cpu();
auto num_points_per_voxel_cpu = num_points_per_voxel.cpu();
auto voxel_num_cpu = voxel_num.cpu();
int64_t *voxel_num_data_cpu = voxel_num_cpu.data_ptr<int64_t>();
std::vector<float> voxel_size_v_cpu(
voxel_size_cpu.data_ptr<float>(),
voxel_size_cpu.data_ptr<float>() + voxel_size_cpu.numel());
std::vector<float> coors_range_v_cpu(
coors_range_cpu.data_ptr<float>(),
coors_range_cpu.data_ptr<float>() + coors_range_cpu.numel());
if (deterministic) {
*voxel_num_data_cpu = hard_voxelize_forward_impl(
points_cpu, voxels_cpu, coors_cpu, num_points_per_voxel_cpu,
voxel_size_v_cpu, coors_range_v_cpu, max_points, max_voxels, NDim);
} else {
puts("nondeterministic hard_voxelize_forward is not supported on host!");
abort();
}
voxels.copy_(voxels_cpu);
coors.copy_(coors_cpu);
num_points_per_voxel.copy_(num_points_per_voxel_cpu);
voxel_num.copy_(voxel_num_cpu);
return;
}
void dynamic_voxelize_forward_diopi(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range,
at::Tensor &coors, const int NDim = 3) {
auto points_p = toDiopiTensorHandle(points);
diopiDevice_t device;
diopiGetTensorDevice(points_p, &device);
if (device == diopi_host) {
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
voxel_size.data_ptr<float>() + voxel_size.numel());
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
NDim);
return;
}
diopiContext ctx(dipu::getCurrentDIPUStream().rawstream());
diopiContextHandle_t ch = &ctx;
auto voxel_size_p = toDiopiTensorHandle(voxel_size);
auto coors_range_p = toDiopiTensorHandle(coors_range);
auto coors_p = toDiopiTensorHandle(coors);
if (reinterpret_cast<void *>(diopiDynamicVoxelizeMmcv) != nullptr) {
auto ret = diopiDynamicVoxelizeMmcv(ch, coors_p, points_p, voxel_size_p,
coors_range_p, NDim);
if (ret == diopiSuccess) return;
}
LOG(WARNING) << "Fallback to cpu: mmcv ext op dynamic_voxelize_forward";
auto points_cpu = points.cpu();
auto voxel_size_cpu = voxel_size.cpu();
auto coors_range_cpu = coors_range.cpu();
auto coors_cpu = coors.cpu();
std::vector<float> voxel_size_v_cpu(
voxel_size_cpu.data_ptr<float>(),
voxel_size_cpu.data_ptr<float>() + voxel_size_cpu.numel());
std::vector<float> coors_range_v_cpu(
coors_range_cpu.data_ptr<float>(),
coors_range_cpu.data_ptr<float>() + coors_range_cpu.numel());
dynamic_voxelize_forward_impl(points_cpu, coors_cpu, voxel_size_v_cpu,
coors_range_v_cpu, NDim);
coors.copy_(coors_cpu);
return;
}
#endif
void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &voxels,
......@@ -40,6 +176,11 @@ void hard_voxelize_forward(const at::Tensor &points,
at::Tensor &voxel_num, const int max_points,
const int max_voxels, const int NDim = 3,
const bool deterministic = true) {
#ifdef MMCV_WITH_DIOPI
hard_voxelize_forward_diopi(points, voxel_size, coors_range, voxels, coors,
num_points_per_voxel, voxel_num, max_points,
max_voxels, NDim, deterministic);
#else
int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
......@@ -57,12 +198,16 @@ void hard_voxelize_forward(const at::Tensor &points,
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
}
#endif
}
void dynamic_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &coors,
const int NDim = 3) {
#ifdef MMCV_WITH_DIOPI
dynamic_voxelize_forward_diopi(points, voxel_size, coors_range, coors, NDim);
#else
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
voxel_size.data_ptr<float>() + voxel_size.numel());
......@@ -71,4 +216,5 @@ void dynamic_voxelize_forward(const at::Tensor &points,
coors_range.data_ptr<float>() + coors_range.numel());
dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
NDim);
#endif
}
......@@ -12,6 +12,7 @@ from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
from mmcv.utils import IS_MLU_AVAILABLE
from ..utils import ext_loader
from .modulated_deform_conv import ModulatedDeformConv2dFunction
......@@ -57,7 +58,8 @@ class DeformConv2dFunction(Function):
input_tensor, grad_output, offset_out, weight, offset_all,
kernel_size=[weight.shape[3], weight.shape[2]],
stride=[1, 1, ctx.stride[0], ctx.stride[1]],
padding=[1, 1, ctx.padding[0], ctx.padding[1]],
padding=[ctx.padding[0], ctx.padding[0], ctx.padding[1],
ctx.padding[1]],
dilation=[1, 1, ctx.dilation[0], ctx.dilation[1]],
groups=ctx.groups, deformable_groups=ctx.deform_groups,
modulated=True)
......@@ -437,3 +439,63 @@ class DeformConv2dPack(DeformConv2d):
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
if IS_MLU_AVAILABLE:
import torchvision
from mmengine.utils import digit_version
from torchvision.ops import deform_conv2d as tv_deform_conv2d
@MODELS.register_module('DCN', force=True)
class DeformConv2dPack_MLU(DeformConv2d):
"""This class is the DCN implementation of the MLU device. The MLU
backend support of the operator has been implemented in torchvision.
The mmcv registration mechanism is used for multiplexing here. The
torchvision implementation of DCN is called.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int): Same as nn.Conv2d, while tuple is not supported.
padding (int): Same as nn.Conv2d, while tuple is not supported.
dilation (int): Same as nn.Conv2d, while tuple is not supported.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by
the norm_cfg. Bias will be set as True if norm_cfg is None,
otherwise False.
im2col_step (int): Number of samples processed by
im2col_cuda_kernel per call. It will work when ``batch_size``
> ``im2col_step``, but ``batch_size`` must be divisible by
``im2col_step``. Default: 32. `New in version 1.7.2.
Currently not supported on MLU devices.`
"""
def __init__(self, *args, **kwargs):
assert digit_version(torchvision.__version__) >= digit_version(
'0.10.0a0'), 'the version of torchvision should be >= 0.10.0'
super().__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deform_groups * 2 * self.kernel_size[0] *
self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x: Tensor) -> Tensor: # type: ignore
cur_im2col_step = min(self.im2col_step, x.size(0))
assert (x.size(0) % cur_im2col_step
) == 0, 'batch size must be divisible by im2col_step'
offset = self.conv_offset(x)
x = x.type_as(offset)
weight = self.weight.type_as(x)
return tv_deform_conv2d(x, offset, weight, None, self.stride,
self.padding, self.dilation)
......@@ -11,6 +11,7 @@ from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
from mmcv.utils import IS_MLU_AVAILABLE
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
......@@ -57,14 +58,17 @@ class ModulatedDeformConv2dFunction(Function):
kernel_w, kernel_h, ctx.deform_groups)
select_offset = offset.index_select(1, sort_index_fp)
offset_all = torch.cat([select_offset, mask], dim=1)
output, offset_out = torch.npu_deformable_conv2d(
import torch_npu
output, offset_out = torch_npu.npu_deformable_conv2d(
input_tensor,
weight,
offset_all,
conv2d_bias,
kernel_size=[kernel_w, kernel_h],
stride=[1, 1, ctx.stride[0], ctx.stride[1]],
padding=[1, 1, ctx.padding[0], ctx.padding[1]],
padding=[
ctx.padding[0], ctx.padding[0], ctx.padding[1], ctx.padding[1]
],
dilation=[1, 1, ctx.dilation[0], ctx.dilation[1]],
groups=ctx.groups,
deformable_groups=ctx.deform_groups,
......@@ -84,7 +88,8 @@ class ModulatedDeformConv2dFunction(Function):
input_tensor, grad_output, offset_out, weight, offset_all,
kernel_size=[weight.shape[3], weight.shape[2]],
stride=[1, 1, ctx.stride[0], ctx.stride[1]],
padding=[1, 1, ctx.padding[0], ctx.padding[1]],
padding=[ctx.padding[0], ctx.padding[0], ctx.padding[1],
ctx.padding[1]],
dilation=[1, 1, ctx.dilation[0], ctx.dilation[1]],
groups=ctx.groups, deformable_groups=ctx.deform_groups,
modulated=True)
......@@ -355,3 +360,68 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
if IS_MLU_AVAILABLE:
import torchvision
from mmengine.utils import digit_version
from torchvision.ops import deform_conv2d as tv_deform_conv2d
@MODELS.register_module('DCNv2', force=True)
class ModulatedDeformConv2dPack_MLU(ModulatedDeformConv2d):
"""This class is the DCNv2 implementation of the MLU device.
The MLU backend support of the operator has been implemented
in torchvision. The mmcv registration mechanism is used for
multiplexing here. The torchvision implementation of DCNv2 is called.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int): Same as nn.Conv2d, while tuple is not supported.
padding (int): Same as nn.Conv2d, while tuple is not supported.
dilation (int): Same as nn.Conv2d, while tuple is not supported.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by
the norm_cfg. Bias will be set as True if norm_cfg is None,
otherwise False.
"""
def __init__(self, *args, **kwargs):
assert digit_version(torchvision.__version__) >= digit_version(
'0.10.0a0'), 'the version of torchvision should be >= 0.10.0'
super().__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deform_groups * 3 * self.kernel_size[0] *
self.kernel_size[1],
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
bias=True)
self.init_weights()
def init_weights(self):
super().init_weights()
if hasattr(self, 'conv_offset'):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
x = x.type_as(offset)
weight = self.weight.type_as(x)
mask = mask.type_as(x)
return tv_deform_conv2d(
x,
offset,
weight,
bias=self.bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
mask=mask)
......@@ -293,8 +293,9 @@ def batched_nms(boxes: Tensor,
max_coordinate + torch.tensor(1).to(boxes))
boxes_for_nms = boxes + offsets[:, None]
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = eval(nms_type)
nms_op = nms_cfg_.pop('type', 'nms')
if isinstance(nms_op, str):
nms_op = eval(nms_op)
split_thr = nms_cfg_.pop('split_thr', 10000)
# Won't split to multiple nms nodes when exporting to onnx
......@@ -410,11 +411,12 @@ def nms_rotated(dets: Tensor,
input_labels = scores.new_empty(0, dtype=torch.int)
else:
input_labels = labels
if dets.device.type == 'npu':
if dets.device.type in ('npu', 'mlu'):
order = scores.new_empty(0, dtype=torch.long)
coefficient = 57.29578 # 180 / PI
for i in range(dets.size()[0]):
dets_cw[i][4] *= coefficient # radians to angle
if dets.device.type == 'npu':
coefficient = 57.29578 # 180 / PI
for i in range(dets.size()[0]):
dets_cw[i][4] *= coefficient # radians to angle
keep_inds = ext_module.nms_rotated(dets_cw, scores, order, dets_cw,
input_labels, iou_threshold,
multi_label)
......
......@@ -31,8 +31,11 @@ def points_in_polygons(points: Tensor, polygons: Tensor) -> Tensor:
assert polygons.shape[1] == 8, \
'polygons dimension should be 8, ' \
f'but got unexpected shape {polygons.shape[1]}'
output = torch.full([points.shape[0], polygons.shape[0]],
0.).cuda().float()
output = torch.zeros(
points.shape[0],
polygons.shape[0],
dtype=torch.float32,
device=points.device)
ext_module.points_in_polygons_forward(points.contiguous(),
polygons.contiguous(), output)
return output
......@@ -105,6 +105,9 @@ class LoadImageFromFile(BaseTransform):
return None
else:
raise e
# in some cases, images are not read successfully, the img would be
# `None`, refer to https://github.com/open-mmlab/mmpretrain/issues/1427
assert img is not None, f'failed to load image: {filename}'
if self.to_float32:
img = img.astype(np.float32)
......
......@@ -1400,7 +1400,7 @@ class RandomResize(BaseTransform):
target\\_scale[0] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]])
* scale[0]
.. math::
target\\_scale[0] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]])
target\\_scale[1] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]])
* scale[1]
Following the resize order of weight and height in cv2, ``ratio_range[0]``
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import functools
import inspect
import weakref
......@@ -80,7 +81,10 @@ class cache_randomness:
def __get__(self, obj, cls):
self.instance_ref = weakref.ref(obj)
return self
# Return a copy to avoid multiple transform instances sharing
# one `cache_randomness` instance, which may cause data races
# in multithreading cases.
return copy.copy(self)
def avoid_cache_randomness(cls):
......
# Copyright (c) OpenMMLab. All rights reserved.
__version__ = '2.0.0'
__dcu_version__ = '2.0.0'
__version__ = '2.1.0'
def parse_version_info(version_str: str, length: int = 4) -> tuple:
"""Parse a version string into a tuple.
......@@ -32,4 +32,4 @@ def parse_version_info(version_str: str, length: int = 4) -> tuple:
version_info = tuple(int(x) for x in __version__.split('.')[:3])
__all__ = ['__version__', '__dcu_version__', 'version_info', 'parse_version_info']
__all__ = ['__version__', 'version_info', 'parse_version_info']
......@@ -7,3 +7,4 @@ sphinx==4.0.2
sphinx-copybutton
sphinx_markdown_tables>=0.0.16
torch
urllib3<2.0.0
addict
mmengine>=0.2.0
mmengine>=0.3.0
numpy
packaging
Pillow
......
......@@ -4,9 +4,6 @@ import platform
import re
from pkg_resources import DistributionNotFound, get_distribution, parse_version
from setuptools import find_packages, setup
import subprocess
from typing import Optional, Union
from pathlib import Path
EXT_TYPE = ''
try:
......@@ -38,63 +35,12 @@ def choose_requirement(primary, secondary):
return str(primary)
def get_sha(pytorch_root: Union[str, Path]) -> str:
try:
return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=pytorch_root).decode('ascii').strip()
except Exception:
return 'Unknown'
def get_abi():
try:
command = "echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
output = result.stdout.strip()
abi = "abi" + output.split(" ")[-1]
return abi
except Exception:
return 'abiUnknown'
def get_version_add(sha: Optional[str] = None) -> str:
version=''
mmcv_root = os.path.dirname(os.path.abspath(__file__))
add_version_path = os.path.join(os.path.join(mmcv_root, "mmcv"), "version.py")
if sha != 'Unknown':
if sha is None:
sha = get_sha(mmcv_root)
version = 'git' + sha[:7]
# abi
version += "." + get_abi()
# dtk version
if os.getenv("ROCM_PATH"):
rocm_path = os.getenv('ROCM_PATH', "")
rocm_version_path = os.path.join(rocm_path, '.info', "rocm_version")
with open(rocm_version_path, 'r',encoding='utf-8') as file:
lines = file.readlines()
rocm_version=lines[0][:-2].replace(".", "")
version += ".dtk" + rocm_version
# torch version
version += ".torch" + torch.__version__[:4]
lines=[]
with open(add_version_path, 'r',encoding='utf-8') as file:
lines = file.readlines()
lines[2] = "__dcu_version__ = '2.0.0+{}'\n".format(version)
with open(add_version_path, encoding="utf-8",mode="w") as file:
file.writelines(lines)
file.close()
def get_version():
get_version_add()
version_file = 'mmcv/version.py'
with open(version_file, encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__dcu_version__']
return locals()['__version__']
def parse_requirements(fname='requirements/runtime.txt', with_version=True):
......@@ -207,6 +153,9 @@ def get_extensions():
glob.glob('./mmcv/ops/csrc/parrots/*.cpp')
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
op_files.remove('./mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.cu')
op_files.remove('./mmcv/ops/csrc/pytorch/cpu/bbox_overlaps_cpu.cpp')
op_files.remove('./mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.cu')
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args = {
'nvcc': [cuda_args, '-std=c++14'] if cuda_args else ['-std=c++14'],
......@@ -252,16 +201,22 @@ def get_extensions():
extra_compile_args = {'cxx': []}
if platform.system() != 'Windows':
extra_compile_args['cxx'] = ['-std=c++14']
if parse_version(torch.__version__) <= parse_version('1.12.1'):
extra_compile_args['cxx'] = ['-std=c++14']
else:
extra_compile_args['cxx'] = ['-std=c++17']
else:
# TODO: In Windows, C++17 is chosen to compile extensions in
# PyTorch2.0 , but a compile error will be reported.
# As a temporary solution, force the use of C++14.
if parse_version(torch.__version__) >= parse_version('2.0.0'):
if parse_version(torch.__version__) <= parse_version('1.12.1'):
extra_compile_args['cxx'] = ['/std:c++14']
else:
extra_compile_args['cxx'] = ['/std:c++17']
include_dirs = []
library_dirs = []
libraries = []
extra_objects = []
extra_link_args = []
is_rocm_pytorch = False
try:
from torch.utils.cpp_extension import ROCM_HOME
......@@ -270,16 +225,40 @@ def get_extensions():
except ImportError:
pass
if is_rocm_pytorch or torch.cuda.is_available() or os.getenv(
if os.getenv('MMCV_WITH_DIOPI', '0') == '1':
import mmengine # NOQA: F401
from mmengine.utils.version_utils import digit_version
assert digit_version(mmengine.__version__) >= digit_version(
'0.7.4'), f'mmengine >= 0.7.4 is required \
but {mmengine.__version__} is installed'
print(f'Compiling {ext_name} with CPU and DIPU')
define_macros += [('MMCV_WITH_DIOPI', None)]
define_macros += [('DIOPI_ATTR_WEAK', None)]
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
extension = CppExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
dipu_root = os.getenv('DIPU_ROOT')
diopi_path = os.getenv('DIOPI_PATH')
dipu_path = os.getenv('DIPU_PATH')
vendor_include_dirs = os.getenv('VENDOR_INCLUDE_DIRS')
nccl_include_dirs = os.getenv('NCCL_INCLUDE_DIRS')
include_dirs.append(dipu_root)
include_dirs.append(diopi_path + '/include')
include_dirs.append(dipu_path + '/dist/include')
include_dirs.append(vendor_include_dirs)
if nccl_include_dirs:
include_dirs.append(nccl_include_dirs)
library_dirs += [dipu_root]
libraries += ['torch_dipu']
elif is_rocm_pytorch or torch.cuda.is_available() or os.getenv(
'FORCE_CUDA', '0') == '1':
if is_rocm_pytorch:
define_macros += [('MMCV_WITH_HIP', None)]
define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
if is_rocm_pytorch and platform.system() != 'Windows':
extra_compile_args['nvcc'] += \
['--gpu-max-threads-per-block=1024']
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \
......@@ -292,16 +271,101 @@ def get_extensions():
torch.is_mlu_available()) or \
os.getenv('FORCE_MLU', '0') == '1':
from torch_mlu.utils.cpp_extension import MLUExtension
def get_mluops_version(file_path):
with open(file_path) as f:
for line in f:
if re.search('MLUOP_MAJOR', line):
major = line.strip().split(' ')[2]
if re.search('MLUOP_MINOR', line):
minor = line.strip().split(' ')[2]
if re.search('MLUOP_PATCHLEVEL', line):
patchlevel = line.strip().split(' ')[2]
mluops_version = f'v{major}.{minor}.{patchlevel}'
return mluops_version
mmcv_mluops_version = get_mluops_version(
'./mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.h')
mlu_ops_path = os.getenv('MMCV_MLU_OPS_PATH')
if mlu_ops_path:
exists_mluops_version = get_mluops_version(
mlu_ops_path + '/bangc-ops/mlu_op.h')
if exists_mluops_version != mmcv_mluops_version:
print('the version of mlu-ops provided is %s,'
' while %s is needed.' %
(exists_mluops_version, mmcv_mluops_version))
exit()
try:
if os.path.exists('mlu-ops'):
if os.path.islink('mlu-ops'):
os.remove('mlu-ops')
os.symlink(mlu_ops_path, 'mlu-ops')
elif os.path.abspath('mlu-ops') != mlu_ops_path:
os.symlink(mlu_ops_path, 'mlu-ops')
else:
os.symlink(mlu_ops_path, 'mlu-ops')
except Exception:
raise FileExistsError(
'mlu-ops already exists, please move it out,'
'or rename or remove it.')
else:
if not os.path.exists('mlu-ops'):
import requests
mluops_url = 'https://github.com/Cambricon/mlu-ops/' + \
'archive/refs/tags/' + mmcv_mluops_version + '.zip'
req = requests.get(mluops_url)
with open('./mlu-ops.zip', 'wb') as f:
try:
f.write(req.content)
except Exception:
raise ImportError('failed to download mlu-ops')
from zipfile import BadZipFile, ZipFile
with ZipFile('./mlu-ops.zip', 'r') as archive:
try:
archive.extractall()
dir_name = archive.namelist()[0].split('/')[0]
os.rename(dir_name, 'mlu-ops')
except BadZipFile:
print('invalid mlu-ops.zip file')
else:
exists_mluops_version = get_mluops_version(
'./mlu-ops/bangc-ops/mlu_op.h')
if exists_mluops_version != mmcv_mluops_version:
print('the version of provided mlu-ops is %s,'
' while %s is needed.' %
(exists_mluops_version, mmcv_mluops_version))
exit()
define_macros += [('MMCV_WITH_MLU', None)]
mlu_args = os.getenv('MMCV_MLU_ARGS')
extra_compile_args['cncc'] = [mlu_args] if mlu_args else []
mlu_args = os.getenv('MMCV_MLU_ARGS', '-DNDEBUG ')
mluops_includes = []
mluops_includes.append('-I' +
os.path.abspath('./mlu-ops/bangc-ops'))
mluops_includes.append(
'-I' + os.path.abspath('./mlu-ops/bangc-ops/kernels'))
extra_compile_args['cncc'] = [mlu_args] + \
mluops_includes if mlu_args else mluops_includes
extra_compile_args['cxx'] += ['-fno-gnu-unique']
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/mlu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu')
glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu') + \
glob.glob(
'./mlu-ops/bangc-ops/core/**/*.cpp', recursive=True) + \
glob.glob(
'./mlu-ops/bangc-ops/kernels/**/*.cpp', recursive=True) + \
glob.glob(
'./mlu-ops/bangc-ops/kernels/**/*.mlu', recursive=True)
extra_link_args = [
'-Wl,--whole-archive',
'./mlu-ops/bangc-ops/kernels/kernel_wrapper/lib/libextops.a',
'-Wl,--no-whole-archive'
]
extension = MLUExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mlu'))
include_dirs.append(os.path.abspath('./mlu-ops/bangc-ops'))
elif (hasattr(torch.backends, 'mps')
and torch.backends.mps.is_available()) or os.getenv(
'FORCE_MPS', '0') == '1':
......@@ -320,9 +384,11 @@ def get_extensions():
extra_compile_args['cxx'] += ['-ObjC++']
# src
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/common/mps/*.mm') + \
glob.glob('./mmcv/ops/csrc/pytorch/mps/*.mm')
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
# TODO: support mps ops on torch>=2.1.0
if parse_version(torch.__version__) < parse_version('2.1.0'):
op_files += glob.glob('./mmcv/ops/csrc/common/mps/*.mm') + \
glob.glob('./mmcv/ops/csrc/pytorch/mps/*.mm')
extension = CppExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mps'))
......@@ -332,6 +398,10 @@ def get_extensions():
from torch_npu.utils.cpp_extension import NpuExtension
define_macros += [('MMCV_WITH_NPU', None)]
extension = NpuExtension
if parse_version(torch.__version__) <= parse_version('2.0.0'):
define_macros += [('MMCV_WITH_XLA', None)]
if parse_version(torch.__version__) > parse_version('2.0.0'):
define_macros += [('MMCV_WITH_KPRIVATE', None)]
except Exception:
raise ImportError('can not find any torch_npu')
# src
......@@ -356,14 +426,21 @@ def get_extensions():
# to compile those cpp files, so there is no need to add the
# argument
if 'nvcc' in extra_compile_args and platform.system() != 'Windows':
extra_compile_args['nvcc'] += ['-std=c++14']
if parse_version(torch.__version__) <= parse_version('1.12.1'):
extra_compile_args['nvcc'] += ['-std=c++14']
else:
extra_compile_args['nvcc'] += ['-std=c++17']
ext_ops = extension(
name=ext_name,
sources=op_files,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args)
extra_objects=extra_objects,
extra_compile_args=extra_compile_args,
library_dirs=library_dirs,
libraries=libraries,
extra_link_args=extra_link_args)
extensions.append(ext_ops)
return extensions
......
# Copyright (c) OpenMMLab. All rights reserved.
import inspect
from importlib import import_module
import numpy as np
......@@ -7,10 +8,14 @@ import torch
import torch.nn as nn
from mmengine.registry import MODELS
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from torch.nn import ReflectionPad2d, Upsample
from mmcv.cnn.bricks import (build_activation_layer, build_conv_layer,
from mmcv.cnn.bricks import (ContextBlock, ConvModule, ConvTranspose2d,
GeneralizedAttention, NonLocal2d,
build_activation_layer, build_conv_layer,
build_norm_layer, build_padding_layer,
build_plugin_layer, build_upsample_layer, is_norm)
from mmcv.cnn.bricks.activation import Clamp
from mmcv.cnn.bricks.norm import infer_abbr as infer_norm_abbr
from mmcv.cnn.bricks.plugin import infer_abbr as infer_plugin_abbr
from mmcv.cnn.bricks.upsample import PixelShufflePack
......@@ -65,18 +70,19 @@ def test_build_conv_layer():
kwargs.pop('groups')
for type_name, module in MODELS.module_dict.items():
cfg = dict(type=type_name)
# SparseInverseConv2d and SparseInverseConv3d do not have the argument
# 'dilation'
if type_name == 'SparseInverseConv2d' or type_name == \
'SparseInverseConv3d':
kwargs.pop('dilation')
if 'conv' in type_name.lower():
layer = build_conv_layer(cfg, **kwargs)
assert isinstance(layer, module)
assert layer.in_channels == kwargs['in_channels']
assert layer.out_channels == kwargs['out_channels']
kwargs['dilation'] = 2 # recover the key
for type_name_ in (type_name, module):
cfg = dict(type=type_name_)
# SparseInverseConv2d and SparseInverseConv3d do not have the
# argument 'dilation'
if type_name == 'SparseInverseConv2d' or type_name == \
'SparseInverseConv3d':
kwargs.pop('dilation')
if 'conv' in type_name.lower():
layer = build_conv_layer(cfg, **kwargs)
assert isinstance(layer, module)
assert layer.in_channels == kwargs['in_channels']
assert layer.out_channels == kwargs['out_channels']
kwargs['dilation'] = 2 # recover the key
def test_infer_norm_abbr():
......@@ -162,17 +168,18 @@ def test_build_norm_layer():
if type_name == 'MMSyncBN': # skip MMSyncBN
continue
for postfix in ['_test', 1]:
cfg = dict(type=type_name)
if type_name == 'GN':
cfg['num_groups'] = 3
name, layer = build_norm_layer(cfg, 3, postfix=postfix)
assert name == abbr_mapping[type_name] + str(postfix)
assert isinstance(layer, module)
if type_name == 'GN':
assert layer.num_channels == 3
assert layer.num_groups == cfg['num_groups']
elif type_name != 'LN':
assert layer.num_features == 3
for type_name_ in (type_name, module):
cfg = dict(type=type_name_)
if type_name == 'GN':
cfg['num_groups'] = 3
name, layer = build_norm_layer(cfg, 3, postfix=postfix)
assert name == abbr_mapping[type_name] + str(postfix)
assert isinstance(layer, module)
if type_name == 'GN':
assert layer.num_channels == 3
assert layer.num_groups == cfg['num_groups']
elif type_name != 'LN':
assert layer.num_features == 3
def test_build_activation_layer():
......@@ -184,7 +191,7 @@ def test_build_activation_layer():
for module_name in ['activation', 'hsigmoid', 'hswish', 'swish']:
act_module = import_module(f'mmcv.cnn.bricks.{module_name}')
for key, value in act_module.__dict__.items():
if isinstance(value, type) and issubclass(value, nn.Module):
if inspect.isclass(value) and issubclass(value, nn.Module):
act_names.append(key)
with pytest.raises(TypeError):
......@@ -210,10 +217,12 @@ def test_build_activation_layer():
assert isinstance(layer, module)
# sanity check for Clamp
act = build_activation_layer(dict(type='Clamp'))
x = torch.randn(10) * 1000
y = act(x)
assert np.logical_and((y >= -1).numpy(), (y <= 1).numpy()).all()
for type_name in ('Clamp', Clamp):
act = build_activation_layer(dict(type='Clamp'))
x = torch.randn(10) * 1000
y = act(x)
assert np.logical_and((y >= -1).numpy(), (y <= 1).numpy()).all()
act = build_activation_layer(dict(type='Clip', min=0))
y = act(x)
assert np.logical_and((y >= 0).numpy(), (y <= 1).numpy()).all()
......@@ -227,7 +236,7 @@ def test_build_padding_layer():
for module_name in ['padding']:
pad_module = import_module(f'mmcv.cnn.bricks.{module_name}')
for key, value in pad_module.__dict__.items():
if isinstance(value, type) and issubclass(value, nn.Module):
if inspect.isclass(value) and issubclass(value, nn.Module):
pad_names.append(key)
with pytest.raises(TypeError):
......@@ -250,12 +259,12 @@ def test_build_padding_layer():
cfg['type'] = type_name
layer = build_padding_layer(cfg, 2)
assert isinstance(layer, module)
input_x = torch.randn(1, 2, 5, 5)
cfg = dict(type='reflect')
padding_layer = build_padding_layer(cfg, 2)
res = padding_layer(input_x)
assert res.shape == (1, 2, 9, 9)
for type_name in (ReflectionPad2d, 'reflect'):
input_x = torch.randn(1, 2, 5, 5)
cfg = dict(type=type_name)
padding_layer = build_padding_layer(cfg, 2)
res = padding_layer(input_x)
assert res.shape == (1, 2, 9, 9)
def test_upsample_layer():
......@@ -280,38 +289,48 @@ def test_upsample_layer():
assert isinstance(layer, nn.Upsample)
assert layer.mode == type_name
cfg = dict()
cfg['type'] = Upsample
layer_from_cls = build_upsample_layer(cfg)
assert isinstance(layer_from_cls, nn.Upsample)
assert layer_from_cls.mode == 'nearest'
cfg = dict(
type='deconv', in_channels=3, out_channels=3, kernel_size=3, stride=2)
layer = build_upsample_layer(cfg)
assert isinstance(layer, nn.ConvTranspose2d)
cfg = dict(type='deconv')
kwargs = dict(in_channels=3, out_channels=3, kernel_size=3, stride=2)
layer = build_upsample_layer(cfg, **kwargs)
assert isinstance(layer, nn.ConvTranspose2d)
assert layer.in_channels == kwargs['in_channels']
assert layer.out_channels == kwargs['out_channels']
assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
assert layer.stride == (kwargs['stride'], kwargs['stride'])
layer = build_upsample_layer(cfg, 3, 3, 3, 2)
assert isinstance(layer, nn.ConvTranspose2d)
assert layer.in_channels == kwargs['in_channels']
assert layer.out_channels == kwargs['out_channels']
assert layer.kernel_size == (kwargs['kernel_size'], kwargs['kernel_size'])
assert layer.stride == (kwargs['stride'], kwargs['stride'])
cfg = dict(
type='pixel_shuffle',
in_channels=3,
out_channels=3,
scale_factor=2,
upsample_kernel=3)
layer = build_upsample_layer(cfg)
for type_name in ('deconv', ConvTranspose2d):
cfg = dict(type=ConvTranspose2d)
kwargs = dict(in_channels=3, out_channels=3, kernel_size=3, stride=2)
layer = build_upsample_layer(cfg, **kwargs)
assert isinstance(layer, nn.ConvTranspose2d)
assert layer.in_channels == kwargs['in_channels']
assert layer.out_channels == kwargs['out_channels']
assert layer.kernel_size == (kwargs['kernel_size'],
kwargs['kernel_size'])
assert layer.stride == (kwargs['stride'], kwargs['stride'])
layer = build_upsample_layer(cfg, 3, 3, 3, 2)
assert isinstance(layer, nn.ConvTranspose2d)
assert layer.in_channels == kwargs['in_channels']
assert layer.out_channels == kwargs['out_channels']
assert layer.kernel_size == (kwargs['kernel_size'],
kwargs['kernel_size'])
assert layer.stride == (kwargs['stride'], kwargs['stride'])
for type_name in ('pixel_shuffle', PixelShufflePack):
cfg = dict(
type=type_name,
in_channels=3,
out_channels=3,
scale_factor=2,
upsample_kernel=3)
layer = build_upsample_layer(cfg)
assert isinstance(layer, PixelShufflePack)
assert layer.scale_factor == 2
assert layer.upsample_kernel == 3
assert isinstance(layer, PixelShufflePack)
assert layer.scale_factor == 2
assert layer.upsample_kernel == 3
def test_pixel_shuffle_pack():
......@@ -396,35 +415,42 @@ def test_build_plugin_layer():
build_plugin_layer(cfg, postfix=[1, 2])
# test ContextBlock
for postfix in ['', '_test', 1]:
cfg = dict(type='ContextBlock')
name, layer = build_plugin_layer(
cfg, postfix=postfix, in_channels=16, ratio=1. / 4)
assert name == 'context_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['ContextBlock'])
for type_name in ('ContextBlock', ContextBlock):
for postfix in ['', '_test', 1]:
cfg = dict(type=type_name)
name, layer = build_plugin_layer(
cfg, postfix=postfix, in_channels=16, ratio=1. / 4)
assert name == 'context_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['ContextBlock'])
# test GeneralizedAttention
for postfix in ['', '_test', 1]:
cfg = dict(type='GeneralizedAttention')
name, layer = build_plugin_layer(cfg, postfix=postfix, in_channels=16)
assert name == 'gen_attention_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['GeneralizedAttention'])
for type_name in ('GeneralizedAttention', GeneralizedAttention):
for postfix in ['', '_test', 1]:
cfg = dict(type=type_name)
name, layer = build_plugin_layer(
cfg, postfix=postfix, in_channels=16)
assert name == 'gen_attention_block' + str(postfix)
assert isinstance(layer,
MODELS.module_dict['GeneralizedAttention'])
# test NonLocal2d
for postfix in ['', '_test', 1]:
cfg = dict(type='NonLocal2d')
name, layer = build_plugin_layer(cfg, postfix=postfix, in_channels=16)
assert name == 'nonlocal_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['NonLocal2d'])
for type_name in ('NonLocal2d', NonLocal2d):
for postfix in ['', '_test', 1]:
cfg = dict(type='NonLocal2d')
name, layer = build_plugin_layer(
cfg, postfix=postfix, in_channels=16)
assert name == 'nonlocal_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['NonLocal2d'])
# test ConvModule
for postfix in ['', '_test', 1]:
cfg = dict(type='ConvModule')
name, layer = build_plugin_layer(
cfg,
postfix=postfix,
in_channels=16,
out_channels=4,
kernel_size=3)
assert name == 'conv_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['ConvModule'])
for type_name in ('ConvModule', ConvModule):
cfg = dict(type=type_name)
name, layer = build_plugin_layer(
cfg,
postfix=postfix,
in_channels=16,
out_channels=4,
kernel_size=3)
assert name == 'conv_block' + str(postfix)
assert isinstance(layer, MODELS.module_dict['ConvModule'])
......@@ -75,6 +75,31 @@ def test_conv_module():
output = conv(x)
assert output.shape == (1, 8, 255, 255)
# conv + norm with efficient mode
efficient_conv = ConvModule(
3, 8, 2, norm_cfg=dict(type='BN'), efficient_conv_bn_eval=True).eval()
plain_conv = ConvModule(
3, 8, 2, norm_cfg=dict(type='BN'),
efficient_conv_bn_eval=False).eval()
for efficient_param, plain_param in zip(
efficient_conv.state_dict().values(),
plain_conv.state_dict().values()):
plain_param.copy_(efficient_param)
efficient_mode_output = efficient_conv(x)
plain_mode_output = plain_conv(x)
assert torch.allclose(efficient_mode_output, plain_mode_output, atol=1e-5)
# `conv` attribute can be dynamically modified in efficient mode
efficient_conv = ConvModule(
3, 8, 2, norm_cfg=dict(type='BN'), efficient_conv_bn_eval=True).eval()
new_conv = nn.Conv2d(3, 8, 2).eval()
efficient_conv.conv = new_conv
efficient_mode_output = efficient_conv(x)
plain_mode_output = efficient_conv.activate(
efficient_conv.norm(new_conv(x)))
assert torch.allclose(efficient_mode_output, plain_mode_output, atol=1e-5)
# conv + act
conv = ConvModule(3, 8, 2)
assert conv.with_activation
......
......@@ -4,6 +4,8 @@ from unittest.mock import patch
import pytest
import torch
import torch.nn as nn
from mmengine.utils import digit_version
from mmengine.utils.dl_utils import TORCH_VERSION
from mmcv.cnn.bricks import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
Linear, MaxPool2d, MaxPool3d)
......@@ -374,3 +376,21 @@ def test_nn_op_forward_called():
wrapper = Linear(3, 3)
wrapper(x_normal)
nn_module_forward.assert_called_with(x_normal)
@pytest.mark.skipif(
digit_version(TORCH_VERSION) < digit_version('1.10'),
reason='MaxPool2d and MaxPool3d will fail fx for torch<=1.9')
def test_fx_compatibility():
from torch import fx
# ensure the fx trace can pass the network
for Net in (MaxPool2d, MaxPool3d):
net = Net(1)
gm_module = fx.symbolic_trace(net) # noqa: F841
for Net in (Linear, ):
net = Net(1, 1)
gm_module = fx.symbolic_trace(net) # noqa: F841
for Net in (Conv2d, ConvTranspose2d, Conv3d, ConvTranspose3d):
net = Net(1, 1, 1)
gm_module = fx.symbolic_trace(net) # noqa: F841
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment