Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
9b1209fa
"docs/en/git@developer.sourcefind.cn:OpenDAS/mmcv.git" did not exist on "91da96431f836d8865b416f7d4a6e19c92ec0a15"
Unverified
Commit
9b1209fa
authored
Jun 13, 2023
by
CokeDong
Committed by
GitHub
Jun 13, 2023
Browse files
[Feature] Support mmcv ext with DIOPI impl (#2790)
parent
558742c9
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
677 additions
and
28 deletions
+677
-28
mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
+43
-0
mmcv/ops/csrc/pytorch/focal_loss.cpp
mmcv/ops/csrc/pytorch/focal_loss.cpp
+87
-0
mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
+174
-2
mmcv/ops/csrc/pytorch/nms.cpp
mmcv/ops/csrc/pytorch/nms.cpp
+43
-0
mmcv/ops/csrc/pytorch/roi_align.cpp
mmcv/ops/csrc/pytorch/roi_align.cpp
+101
-0
mmcv/ops/csrc/pytorch/voxelization.cpp
mmcv/ops/csrc/pytorch/voxelization.cpp
+146
-0
setup.py
setup.py
+32
-1
tests/test_ops/test_modulated_deform_conv.py
tests/test_ops/test_modulated_deform_conv.py
+23
-6
tests/test_ops/test_roi_align.py
tests/test_ops/test_roi_align.py
+14
-13
tests/test_ops/test_voxelization.py
tests/test_ops/test_voxelization.py
+14
-6
No files found.
mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
View file @
9b1209fa
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
void
bbox_overlaps_impl
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
void
bbox_overlaps_impl
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
...
@@ -8,7 +18,40 @@ void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
...
@@ -8,7 +18,40 @@ void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
aligned
,
offset
);
aligned
,
offset
);
}
}
#ifdef MMCV_WITH_DIOPI
void
bbox_overlaps_diopi
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
auto
bboxes1_p
=
toDiopiTensorHandle
(
bboxes1
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
bboxes1_p
,
&
device
);
if
(
device
==
diopi_host
)
{
bbox_overlaps_impl
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
bboxes2_p
=
toDiopiTensorHandle
(
bboxes2
);
auto
ious_p
=
toDiopiTensorHandle
(
ious
);
if
(
reinterpret_cast
<
void
*>
(
diopiBboxOverlapsMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiBboxOverlapsMmcv
(
ch
,
ious_p
,
bboxes1_p
,
bboxes2_p
,
mode
,
offset
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op bbox_overlaps"
;
auto
bboxes1_cpu
=
bboxes1
.
cpu
();
auto
bboxes2_cpu
=
bboxes2
.
cpu
();
auto
ious_cpu
=
ious
.
cpu
();
bbox_overlaps_impl
(
bboxes1_cpu
,
bboxes2_cpu
,
ious_cpu
,
mode
,
aligned
,
offset
);
ious
.
copy_
(
ious_cpu
);
}
#endif
void
bbox_overlaps
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
void
bbox_overlaps
(
const
Tensor
bboxes1
,
const
Tensor
bboxes2
,
Tensor
ious
,
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
const
int
mode
,
const
bool
aligned
,
const
int
offset
)
{
#ifdef MMCV_WITH_DIOPI
bbox_overlaps_diopi
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
#else
bbox_overlaps_impl
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
bbox_overlaps_impl
(
bboxes1
,
bboxes2
,
ious
,
mode
,
aligned
,
offset
);
#endif
}
}
mmcv/ops/csrc/pytorch/focal_loss.cpp
View file @
9b1209fa
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
void
sigmoid_focal_loss_forward_impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_forward_impl
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
...
@@ -29,15 +39,92 @@ void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
...
@@ -29,15 +39,92 @@ void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
buff
,
grad_input
,
gamma
,
alpha
);
buff
,
grad_input
,
gamma
,
alpha
);
}
}
#ifdef MMCV_WITH_DIOPI
void
sigmoid_focal_loss_forward_diopi
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
auto
input_p
=
toDiopiTensorHandle
(
input
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
input_p
,
&
device
);
if
(
device
==
diopi_host
)
{
sigmoid_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
target_p
=
toDiopiTensorHandle
(
target
);
auto
weight_p
=
toDiopiTensorHandle
(
weight
);
auto
output_p
=
toDiopiTensorHandle
(
output
);
if
(
reinterpret_cast
<
void
*>
(
diopiSigmoidFocalLossMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiSigmoidFocalLossMmcv
(
ch
,
output_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op sigmoid_focal_loss_forward_impl"
;
auto
input_cpu
=
input
.
cpu
();
auto
target_cpu
=
target
.
cpu
();
auto
weight_cpu
=
weight
.
cpu
();
auto
output_cpu
=
output
.
cpu
();
sigmoid_focal_loss_forward_impl
(
input_cpu
,
target_cpu
,
weight_cpu
,
output_cpu
,
gamma
,
alpha
);
output
.
copy_
(
output_cpu
);
return
;
}
void
sigmoid_focal_loss_backward_diopi
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
auto
input_p
=
toDiopiTensorHandle
(
input
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
input_p
,
&
device
);
if
(
device
==
diopi_host
)
{
sigmoid_focal_loss_backward_impl
(
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
target_p
=
toDiopiTensorHandle
(
target
);
auto
weight_p
=
toDiopiTensorHandle
(
weight
);
auto
grad_input_p
=
toDiopiTensorHandle
(
grad_input
);
if
(
reinterpret_cast
<
void
*>
(
diopiSigmoidFocalLossBackwardMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiSigmoidFocalLossBackwardMmcv
(
ch
,
grad_input_p
,
input_p
,
target_p
,
weight_p
,
gamma
,
alpha
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op sigmoid_focal_loss_forward_impl"
;
auto
input_cpu
=
input
.
cpu
();
auto
target_cpu
=
target
.
cpu
();
auto
weight_cpu
=
weight
.
cpu
();
auto
grad_input_cpu
=
grad_input
.
cpu
();
sigmoid_focal_loss_backward_impl
(
input_cpu
,
target_cpu
,
weight_cpu
,
grad_input_cpu
,
gamma
,
alpha
);
grad_input
.
copy_
(
grad_input_cpu
);
return
;
}
#endif
void
sigmoid_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
output
,
float
gamma
,
float
alpha
)
{
Tensor
output
,
float
gamma
,
float
alpha
)
{
#ifdef MMCV_WITH_DIOPI
sigmoid_focal_loss_forward_diopi
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#else
sigmoid_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
sigmoid_focal_loss_forward_impl
(
input
,
target
,
weight
,
output
,
gamma
,
alpha
);
#endif
}
}
void
sigmoid_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
sigmoid_focal_loss_backward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
Tensor
grad_input
,
float
gamma
,
float
alpha
)
{
#ifdef MMCV_WITH_DIOPI
sigmoid_focal_loss_backward_diopi
(
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
#else
sigmoid_focal_loss_backward_impl
(
input
,
target
,
weight
,
grad_input
,
gamma
,
sigmoid_focal_loss_backward_impl
(
input
,
target
,
weight
,
grad_input
,
gamma
,
alpha
);
alpha
);
#endif
}
}
void
softmax_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
void
softmax_focal_loss_forward
(
Tensor
input
,
Tensor
target
,
Tensor
weight
,
...
...
mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
View file @
9b1209fa
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
void
modulated_deformable_im2col_impl
(
void
modulated_deformable_im2col_impl
(
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
const
Tensor
data_im
,
const
Tensor
data_offset
,
const
Tensor
data_mask
,
...
@@ -45,7 +55,7 @@ void modulated_deformable_col2im_coord_impl(
...
@@ -45,7 +55,7 @@ void modulated_deformable_col2im_coord_impl(
dilation_w
,
deformable_group
,
grad_offset
,
grad_mask
);
dilation_w
,
deformable_group
,
grad_offset
,
grad_mask
);
}
}
void
modulated_deform_conv_forward
(
void
modulated_deform_conv_forward
_fallthrough
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
mask
,
Tensor
output
,
Tensor
columns
,
int
kernel_h
,
int
kernel_w
,
Tensor
mask
,
Tensor
output
,
Tensor
columns
,
int
kernel_h
,
int
kernel_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
...
@@ -123,7 +133,7 @@ void modulated_deform_conv_forward(
...
@@ -123,7 +133,7 @@ void modulated_deform_conv_forward(
}
}
}
}
void
modulated_deform_conv_backward
(
void
modulated_deform_conv_backward
_fallthrough
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
mask
,
Tensor
columns
,
Tensor
grad_input
,
Tensor
grad_weight
,
Tensor
mask
,
Tensor
columns
,
Tensor
grad_input
,
Tensor
grad_weight
,
Tensor
grad_bias
,
Tensor
grad_offset
,
Tensor
grad_mask
,
Tensor
grad_output
,
Tensor
grad_bias
,
Tensor
grad_offset
,
Tensor
grad_mask
,
Tensor
grad_output
,
...
@@ -235,3 +245,165 @@ void modulated_deform_conv_backward(
...
@@ -235,3 +245,165 @@ void modulated_deform_conv_backward(
grad_output
.
size
(
2
),
grad_output
.
size
(
3
),
grad_output
.
size
(
2
),
grad_output
.
size
(
3
),
grad_output
.
size
(
4
)});
grad_output
.
size
(
4
)});
}
}
#ifdef MMCV_WITH_DIOPI
void
modulated_deform_conv_forward_diopi
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
mask
,
Tensor
output
,
Tensor
columns
,
int
kernel_h
,
int
kernel_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
group
,
const
int
deformable_group
,
const
bool
with_bias
)
{
auto
input_p
=
toDiopiTensorHandle
(
input
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
input_p
,
&
device
);
if
(
device
==
diopi_host
)
{
modulated_deform_conv_forward_fallthrough
(
input
,
weight
,
bias
,
ones
,
offset
,
mask
,
output
,
columns
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
weight_p
=
toDiopiTensorHandle
(
weight
);
auto
bias_p
=
toDiopiTensorHandle
(
bias
);
auto
ones_p
=
toDiopiTensorHandle
(
ones
);
auto
offset_p
=
toDiopiTensorHandle
(
offset
);
auto
mask_p
=
toDiopiTensorHandle
(
mask
);
auto
output_p
=
toDiopiTensorHandle
(
output
);
auto
columns_p
=
toDiopiTensorHandle
(
columns
);
if
(
reinterpret_cast
<
void
*>
(
diopiModulatedDeformConvMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiModulatedDeformConvMmcv
(
ch
,
output_p
,
columns_p
,
ones_p
,
input_p
,
weight_p
,
bias_p
,
offset_p
,
mask_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op modulated_deform_conv_forward"
;
auto
input_cpu
=
input
.
cpu
();
auto
weight_cpu
=
weight
.
cpu
();
auto
bias_cpu
=
bias
.
cpu
();
auto
ones_cpu
=
ones
.
cpu
();
auto
offset_cpu
=
offset
.
cpu
();
auto
mask_cpu
=
mask
.
cpu
();
auto
output_cpu
=
output
.
cpu
();
auto
columns_cpu
=
columns
.
cpu
();
modulated_deform_conv_forward_fallthrough
(
input_cpu
,
weight_cpu
,
bias_cpu
,
ones_cpu
,
offset_cpu
,
mask_cpu
,
output_cpu
,
columns_cpu
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
output
.
copy_
(
output_cpu
);
return
;
}
void
modulated_deform_conv_backward_diopi
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
mask
,
Tensor
columns
,
Tensor
grad_input
,
Tensor
grad_weight
,
Tensor
grad_bias
,
Tensor
grad_offset
,
Tensor
grad_mask
,
Tensor
grad_output
,
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
int
pad_h
,
int
pad_w
,
int
dilation_h
,
int
dilation_w
,
int
group
,
int
deformable_group
,
const
bool
with_bias
)
{
auto
input_p
=
toDiopiTensorHandle
(
input
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
input_p
,
&
device
);
if
(
device
==
diopi_host
)
{
modulated_deform_conv_backward_fallthrough
(
input
,
weight
,
bias
,
ones
,
offset
,
mask
,
columns
,
grad_input
,
grad_weight
,
grad_bias
,
grad_offset
,
grad_mask
,
grad_output
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
weight_p
=
toDiopiTensorHandle
(
weight
);
auto
bias_p
=
toDiopiTensorHandle
(
bias
);
auto
ones_p
=
toDiopiTensorHandle
(
ones
);
auto
offset_p
=
toDiopiTensorHandle
(
offset
);
auto
mask_p
=
toDiopiTensorHandle
(
mask
);
auto
columns_p
=
toDiopiTensorHandle
(
columns
);
auto
grad_input_p
=
toDiopiTensorHandle
(
grad_input
);
auto
grad_weight_p
=
toDiopiTensorHandle
(
grad_weight
);
auto
grad_bias_p
=
toDiopiTensorHandle
(
grad_bias
);
auto
grad_offset_p
=
toDiopiTensorHandle
(
grad_offset
);
auto
grad_mask_p
=
toDiopiTensorHandle
(
grad_mask
);
auto
grad_output_p
=
toDiopiTensorHandle
(
grad_output
);
if
(
reinterpret_cast
<
void
*>
(
diopiModulatedDeformConvBackwardMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiModulatedDeformConvBackwardMmcv
(
ch
,
grad_input_p
,
grad_weight_p
,
grad_bias_p
,
grad_offset_p
,
grad_mask_p
,
input_p
,
weight_p
,
bias_p
,
ones_p
,
offset_p
,
mask_p
,
columns_p
,
grad_output_p
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op modulated_deform_conv_forward"
;
auto
input_cpu
=
input
.
cpu
();
auto
weight_cpu
=
weight
.
cpu
();
auto
bias_cpu
=
bias
.
cpu
();
auto
ones_cpu
=
ones
.
cpu
();
auto
offset_cpu
=
offset
.
cpu
();
auto
mask_cpu
=
mask
.
cpu
();
auto
columns_cpu
=
columns
.
cpu
();
auto
grad_input_cpu
=
grad_input
.
cpu
();
auto
grad_weight_cpu
=
grad_weight
.
cpu
();
auto
grad_bias_cpu
=
grad_bias
.
cpu
();
auto
grad_offset_cpu
=
grad_offset
.
cpu
();
auto
grad_mask_cpu
=
grad_mask
.
cpu
();
auto
grad_output_cpu
=
grad_output
.
cpu
();
modulated_deform_conv_backward_fallthrough
(
input_cpu
,
weight_cpu
,
bias_cpu
,
ones_cpu
,
offset_cpu
,
mask_cpu
,
columns_cpu
,
grad_input_cpu
,
grad_weight_cpu
,
grad_bias_cpu
,
grad_offset_cpu
,
grad_mask_cpu
,
grad_output_cpu
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
grad_input
.
copy_
(
grad_input_cpu
);
grad_weight
.
copy_
(
grad_weight_cpu
);
grad_bias
.
copy_
(
grad_bias_cpu
);
grad_offset
.
copy_
(
grad_offset_cpu
);
grad_mask
.
copy_
(
grad_mask_cpu
);
return
;
}
#endif
void
modulated_deform_conv_forward
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
mask
,
Tensor
output
,
Tensor
columns
,
int
kernel_h
,
int
kernel_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
group
,
const
int
deformable_group
,
const
bool
with_bias
)
{
#ifdef MMCV_WITH_DIOPI
modulated_deform_conv_forward_diopi
(
input
,
weight
,
bias
,
ones
,
offset
,
mask
,
output
,
columns
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
#else
modulated_deform_conv_forward_fallthrough
(
input
,
weight
,
bias
,
ones
,
offset
,
mask
,
output
,
columns
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
#endif
}
void
modulated_deform_conv_backward
(
Tensor
input
,
Tensor
weight
,
Tensor
bias
,
Tensor
ones
,
Tensor
offset
,
Tensor
mask
,
Tensor
columns
,
Tensor
grad_input
,
Tensor
grad_weight
,
Tensor
grad_bias
,
Tensor
grad_offset
,
Tensor
grad_mask
,
Tensor
grad_output
,
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
int
pad_h
,
int
pad_w
,
int
dilation_h
,
int
dilation_w
,
int
group
,
int
deformable_group
,
const
bool
with_bias
)
{
#ifdef MMCV_WITH_DIOPI
modulated_deform_conv_backward_diopi
(
input
,
weight
,
bias
,
ones
,
offset
,
mask
,
columns
,
grad_input
,
grad_weight
,
grad_bias
,
grad_offset
,
grad_mask
,
grad_output
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
#else
modulated_deform_conv_backward_fallthrough
(
input
,
weight
,
bias
,
ones
,
offset
,
mask
,
columns
,
grad_input
,
grad_weight
,
grad_bias
,
grad_offset
,
grad_mask
,
grad_output
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
pad_w
,
dilation_h
,
dilation_w
,
group
,
deformable_group
,
with_bias
);
#endif
}
mmcv/ops/csrc/pytorch/nms.cpp
View file @
9b1209fa
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
Tensor
nms_impl
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
Tensor
nms_impl
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
return
DISPATCH_DEVICE_IMPL
(
nms_impl
,
boxes
,
scores
,
iou_threshold
,
offset
);
return
DISPATCH_DEVICE_IMPL
(
nms_impl
,
boxes
,
scores
,
iou_threshold
,
offset
);
...
@@ -18,8 +28,41 @@ std::vector<std::vector<int> > nms_match_impl(Tensor dets,
...
@@ -18,8 +28,41 @@ std::vector<std::vector<int> > nms_match_impl(Tensor dets,
return
DISPATCH_DEVICE_IMPL
(
nms_match_impl
,
dets
,
iou_threshold
);
return
DISPATCH_DEVICE_IMPL
(
nms_match_impl
,
dets
,
iou_threshold
);
}
}
#ifdef MMCV_WITH_DIOPI
Tensor
nms_diopi
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
auto
boxes_p
=
toDiopiTensorHandle
(
boxes
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
boxes_p
,
&
device
);
if
(
device
==
diopi_host
)
{
return
nms_impl
(
boxes
,
scores
,
iou_threshold
,
offset
);
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
Tensor
out
;
auto
outp
=
toDiopiTensorHandle
(
out
);
diopiTensorHandle_t
*
outhandle
=
&
outp
;
auto
scores_p
=
toDiopiTensorHandle
(
scores
);
if
(
reinterpret_cast
<
void
*>
(
diopiNmsMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiNmsMmcv
(
ch
,
outhandle
,
boxes_p
,
scores_p
,
iou_threshold
,
offset
);
if
(
ret
==
diopiSuccess
)
{
auto
tensorhandle
=
reinterpret_cast
<
Tensor
*>
(
*
outhandle
);
return
*
tensorhandle
;
}
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op nms"
;
auto
boxes_cpu
=
boxes
.
cpu
();
auto
scores_cpu
=
scores
.
cpu
();
return
nms_impl
(
boxes_cpu
,
scores_cpu
,
iou_threshold
,
offset
);
}
#endif
Tensor
nms
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
Tensor
nms
(
Tensor
boxes
,
Tensor
scores
,
float
iou_threshold
,
int
offset
)
{
#ifdef MMCV_WITH_DIOPI
return
nms_diopi
(
boxes
,
scores
,
iou_threshold
,
offset
);
#else
return
nms_impl
(
boxes
,
scores
,
iou_threshold
,
offset
);
return
nms_impl
(
boxes
,
scores
,
iou_threshold
,
offset
);
#endif
}
}
Tensor
softnms
(
Tensor
boxes
,
Tensor
scores
,
Tensor
dets
,
float
iou_threshold
,
Tensor
softnms
(
Tensor
boxes
,
Tensor
scores
,
Tensor
dets
,
float
iou_threshold
,
...
...
mmcv/ops/csrc/pytorch/roi_align.cpp
View file @
9b1209fa
// Copyright (c) OpenMMLab. All rights reserved
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
void
roi_align_forward_impl
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
void
roi_align_forward_impl
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
argmax_y
,
Tensor
argmax_x
,
...
@@ -22,20 +32,111 @@ void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
...
@@ -22,20 +32,111 @@ void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
}
}
#ifdef MMCV_WITH_DIOPI
void
roi_align_forward_diopi
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
auto
input_p
=
toDiopiTensorHandle
(
input
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
input_p
,
&
device
);
if
(
device
==
diopi_host
)
{
roi_align_forward_impl
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
rois_p
=
toDiopiTensorHandle
(
rois
);
auto
out_p
=
toDiopiTensorHandle
(
output
);
auto
argmax_y_p
=
toDiopiTensorHandle
(
argmax_y
);
auto
argmax_x_p
=
toDiopiTensorHandle
(
argmax_x
);
if
(
reinterpret_cast
<
void
*>
(
diopiRoiAlignMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiRoiAlignMmcv
(
ch
,
out_p
,
argmax_y_p
,
argmax_x_p
,
input_p
,
rois_p
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op roi_align_forward"
;
auto
input_cpu
=
input
.
cpu
();
auto
rois_cpu
=
rois
.
cpu
();
auto
out_cpu
=
output
.
cpu
();
auto
argmax_y_cpu
=
argmax_y
.
cpu
();
auto
argmax_x_cpu
=
argmax_x
.
cpu
();
roi_align_forward_impl
(
input_cpu
,
rois_cpu
,
out_cpu
,
argmax_y_cpu
,
argmax_x_cpu
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
output
.
copy_
(
out_cpu
);
}
void
roi_align_backward_diopi
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
auto
grad_output_
=
toDiopiTensorHandle
(
grad_output
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
grad_output_
,
&
device
);
if
(
device
==
diopi_host
)
{
roi_align_backward_impl
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
return
;
}
auto
rois_
=
toDiopiTensorHandle
(
rois
);
auto
argmax_y_
=
toDiopiTensorHandle
(
argmax_y
);
auto
argmax_x_
=
toDiopiTensorHandle
(
argmax_x
);
auto
grad_input_
=
toDiopiTensorHandle
(
grad_input
);
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
if
(
reinterpret_cast
<
void
*>
(
diopiRoiAlignBackwardMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiRoiAlignBackwardMmcv
(
ch
,
grad_input_
,
grad_output_
,
rois_
,
argmax_y_
,
argmax_x_
,
aligned_height
,
aligned_width
,
sampling_ratio
,
pool_mode
,
spatial_scale
,
aligned
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op roi_align_backward"
;
auto
grad_output_cpu
=
grad_output
.
cpu
();
auto
rois_cpu
=
rois
.
cpu
();
auto
argmax_y_cpu
=
argmax_y
.
cpu
();
auto
argmax_x_cpu
=
argmax_x
.
cpu
();
auto
grad_input_cpu
=
grad_input
.
cpu
();
roi_align_backward_impl
(
grad_output_cpu
,
rois_cpu
,
argmax_y_cpu
,
argmax_x_cpu
,
grad_input_cpu
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
grad_input
.
copy_
(
grad_input_cpu
);
}
#endif
void
roi_align_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
void
roi_align_forward
(
Tensor
input
,
Tensor
rois
,
Tensor
output
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
Tensor
argmax_y
,
Tensor
argmax_x
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
#ifdef MMCV_WITH_DIOPI
roi_align_forward_diopi
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
#else
roi_align_forward_impl
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
roi_align_forward_impl
(
input
,
rois
,
output
,
argmax_y
,
argmax_x
,
aligned_height
,
aligned_width
,
spatial_scale
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
sampling_ratio
,
pool_mode
,
aligned
);
#endif
}
}
void
roi_align_backward
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
void
roi_align_backward
(
Tensor
grad_output
,
Tensor
rois
,
Tensor
argmax_y
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
Tensor
argmax_x
,
Tensor
grad_input
,
int
aligned_height
,
int
aligned_width
,
float
spatial_scale
,
int
aligned_width
,
float
spatial_scale
,
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
int
sampling_ratio
,
int
pool_mode
,
bool
aligned
)
{
#ifdef MMCV_WITH_DIOPI
roi_align_backward_diopi
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
#else
roi_align_backward_impl
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
roi_align_backward_impl
(
grad_output
,
rois
,
argmax_y
,
argmax_x
,
grad_input
,
aligned_height
,
aligned_width
,
spatial_scale
,
aligned_height
,
aligned_width
,
spatial_scale
,
sampling_ratio
,
pool_mode
,
aligned
);
sampling_ratio
,
pool_mode
,
aligned
);
#endif
}
}
mmcv/ops/csrc/pytorch/voxelization.cpp
View file @
9b1209fa
// Copyright (c) OpenMMLab. All rights reserved.
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_DIOPI
#include <diopi/diopirt.h>
#include <diopi/functions.h>
#include <diopi/functions_mmcv.h>
#include "csrc_dipu/diopirt/diopirt_impl.h"
using
dipu
::
diopi_helper
::
toDiopiScalar
;
using
dipu
::
diopi_helper
::
toDiopiTensorHandle
;
#endif
int
hard_voxelize_forward_impl
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
int
hard_voxelize_forward_impl
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
coors
,
...
@@ -33,6 +43,132 @@ void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
...
@@ -33,6 +43,132 @@ void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
coors_range
,
NDim
);
coors_range
,
NDim
);
}
}
#ifdef MMCV_WITH_DIOPI
void
hard_voxelize_forward_diopi
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_size
,
const
at
::
Tensor
&
coors_range
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
at
::
Tensor
&
voxel_num
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
,
const
bool
deterministic
=
true
)
{
auto
points_p
=
toDiopiTensorHandle
(
points
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
points_p
,
&
device
);
if
(
device
==
diopi_host
)
{
int64_t
*
voxel_num_data
=
voxel_num
.
data_ptr
<
int64_t
>
();
std
::
vector
<
float
>
voxel_size_v
(
voxel_size
.
data_ptr
<
float
>
(),
voxel_size
.
data_ptr
<
float
>
()
+
voxel_size
.
numel
());
std
::
vector
<
float
>
coors_range_v
(
coors_range
.
data_ptr
<
float
>
(),
coors_range
.
data_ptr
<
float
>
()
+
coors_range
.
numel
());
if
(
deterministic
)
{
*
voxel_num_data
=
hard_voxelize_forward_impl
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size_v
,
coors_range_v
,
max_points
,
max_voxels
,
NDim
);
}
else
{
TORCH_CHECK
(
deterministic
,
"nondeterministic hard_voxelize_forward is not supported on host!"
);
}
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
voxel_size_p
=
toDiopiTensorHandle
(
voxel_size
);
auto
coors_range_p
=
toDiopiTensorHandle
(
coors_range
);
auto
voxels_p
=
toDiopiTensorHandle
(
voxels
);
auto
coors_p
=
toDiopiTensorHandle
(
coors
);
auto
num_points_per_voxel_p
=
toDiopiTensorHandle
(
num_points_per_voxel
);
auto
voxel_num_p
=
toDiopiTensorHandle
(
voxel_num
);
if
(
reinterpret_cast
<
void
*>
(
diopiHardVoxelizeMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiHardVoxelizeMmcv
(
ch
,
voxels_p
,
coors_p
,
num_points_per_voxel_p
,
voxel_num_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
max_points
,
max_voxels
,
NDim
,
deterministic
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op hard_voxelize_forward"
;
auto
points_cpu
=
points
.
cpu
();
auto
voxel_size_cpu
=
voxel_size
.
cpu
();
auto
coors_range_cpu
=
coors_range
.
cpu
();
auto
voxels_cpu
=
voxels
.
cpu
();
auto
coors_cpu
=
coors
.
cpu
();
auto
num_points_per_voxel_cpu
=
num_points_per_voxel
.
cpu
();
auto
voxel_num_cpu
=
voxel_num
.
cpu
();
int64_t
*
voxel_num_data_cpu
=
voxel_num_cpu
.
data_ptr
<
int64_t
>
();
std
::
vector
<
float
>
voxel_size_v_cpu
(
voxel_size_cpu
.
data_ptr
<
float
>
(),
voxel_size_cpu
.
data_ptr
<
float
>
()
+
voxel_size_cpu
.
numel
());
std
::
vector
<
float
>
coors_range_v_cpu
(
coors_range_cpu
.
data_ptr
<
float
>
(),
coors_range_cpu
.
data_ptr
<
float
>
()
+
coors_range_cpu
.
numel
());
if
(
deterministic
)
{
*
voxel_num_data_cpu
=
hard_voxelize_forward_impl
(
points_cpu
,
voxels_cpu
,
coors_cpu
,
num_points_per_voxel_cpu
,
voxel_size_v_cpu
,
coors_range_v_cpu
,
max_points
,
max_voxels
,
NDim
);
}
else
{
puts
(
"nondeterministic hard_voxelize_forward is not supported on host!"
);
abort
();
}
voxels
.
copy_
(
voxels_cpu
);
coors
.
copy_
(
coors_cpu
);
num_points_per_voxel
.
copy_
(
num_points_per_voxel_cpu
);
voxel_num
.
copy_
(
voxel_num_cpu
);
return
;
}
void
dynamic_voxelize_forward_diopi
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_size
,
const
at
::
Tensor
&
coors_range
,
at
::
Tensor
&
coors
,
const
int
NDim
=
3
)
{
auto
points_p
=
toDiopiTensorHandle
(
points
);
diopiDevice_t
device
;
diopiGetTensorDevice
(
points_p
,
&
device
);
if
(
device
==
diopi_host
)
{
std
::
vector
<
float
>
voxel_size_v
(
voxel_size
.
data_ptr
<
float
>
(),
voxel_size
.
data_ptr
<
float
>
()
+
voxel_size
.
numel
());
std
::
vector
<
float
>
coors_range_v
(
coors_range
.
data_ptr
<
float
>
(),
coors_range
.
data_ptr
<
float
>
()
+
coors_range
.
numel
());
dynamic_voxelize_forward_impl
(
points
,
coors
,
voxel_size_v
,
coors_range_v
,
NDim
);
return
;
}
diopiContext
ctx
(
dipu
::
getCurrentDIPUStream
().
rawstream
());
diopiContextHandle_t
ch
=
&
ctx
;
auto
voxel_size_p
=
toDiopiTensorHandle
(
voxel_size
);
auto
coors_range_p
=
toDiopiTensorHandle
(
coors_range
);
auto
coors_p
=
toDiopiTensorHandle
(
coors
);
if
(
reinterpret_cast
<
void
*>
(
diopiDynamicVoxelizeMmcv
)
!=
nullptr
)
{
auto
ret
=
diopiDynamicVoxelizeMmcv
(
ch
,
coors_p
,
points_p
,
voxel_size_p
,
coors_range_p
,
NDim
);
if
(
ret
==
diopiSuccess
)
return
;
}
LOG
(
WARNING
)
<<
"Fallback to cpu: mmcv ext op dynamic_voxelize_forward"
;
auto
points_cpu
=
points
.
cpu
();
auto
voxel_size_cpu
=
voxel_size
.
cpu
();
auto
coors_range_cpu
=
coors_range
.
cpu
();
auto
coors_cpu
=
coors
.
cpu
();
std
::
vector
<
float
>
voxel_size_v_cpu
(
voxel_size_cpu
.
data_ptr
<
float
>
(),
voxel_size_cpu
.
data_ptr
<
float
>
()
+
voxel_size_cpu
.
numel
());
std
::
vector
<
float
>
coors_range_v_cpu
(
coors_range_cpu
.
data_ptr
<
float
>
(),
coors_range_cpu
.
data_ptr
<
float
>
()
+
coors_range_cpu
.
numel
());
dynamic_voxelize_forward_impl
(
points_cpu
,
coors_cpu
,
voxel_size_v_cpu
,
coors_range_v_cpu
,
NDim
);
coors
.
copy_
(
coors_cpu
);
return
;
}
#endif
void
hard_voxelize_forward
(
const
at
::
Tensor
&
points
,
void
hard_voxelize_forward
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_size
,
const
at
::
Tensor
&
voxel_size
,
const
at
::
Tensor
&
coors_range
,
at
::
Tensor
&
voxels
,
const
at
::
Tensor
&
coors_range
,
at
::
Tensor
&
voxels
,
...
@@ -40,6 +176,11 @@ void hard_voxelize_forward(const at::Tensor &points,
...
@@ -40,6 +176,11 @@ void hard_voxelize_forward(const at::Tensor &points,
at
::
Tensor
&
voxel_num
,
const
int
max_points
,
at
::
Tensor
&
voxel_num
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
,
const
int
max_voxels
,
const
int
NDim
=
3
,
const
bool
deterministic
=
true
)
{
const
bool
deterministic
=
true
)
{
#ifdef MMCV_WITH_DIOPI
hard_voxelize_forward_diopi
(
points
,
voxel_size
,
coors_range
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_num
,
max_points
,
max_voxels
,
NDim
,
deterministic
);
#else
int64_t
*
voxel_num_data
=
voxel_num
.
data_ptr
<
int64_t
>
();
int64_t
*
voxel_num_data
=
voxel_num
.
data_ptr
<
int64_t
>
();
std
::
vector
<
float
>
voxel_size_v
(
std
::
vector
<
float
>
voxel_size_v
(
voxel_size
.
data_ptr
<
float
>
(),
voxel_size
.
data_ptr
<
float
>
(),
...
@@ -57,12 +198,16 @@ void hard_voxelize_forward(const at::Tensor &points,
...
@@ -57,12 +198,16 @@ void hard_voxelize_forward(const at::Tensor &points,
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size_v
,
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size_v
,
coors_range_v
,
max_points
,
max_voxels
,
NDim
);
coors_range_v
,
max_points
,
max_voxels
,
NDim
);
}
}
#endif
}
}
void
dynamic_voxelize_forward
(
const
at
::
Tensor
&
points
,
void
dynamic_voxelize_forward
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_size
,
const
at
::
Tensor
&
voxel_size
,
const
at
::
Tensor
&
coors_range
,
at
::
Tensor
&
coors
,
const
at
::
Tensor
&
coors_range
,
at
::
Tensor
&
coors
,
const
int
NDim
=
3
)
{
const
int
NDim
=
3
)
{
#ifdef MMCV_WITH_DIOPI
dynamic_voxelize_forward_diopi
(
points
,
voxel_size
,
coors_range
,
coors
,
NDim
);
#else
std
::
vector
<
float
>
voxel_size_v
(
std
::
vector
<
float
>
voxel_size_v
(
voxel_size
.
data_ptr
<
float
>
(),
voxel_size
.
data_ptr
<
float
>
(),
voxel_size
.
data_ptr
<
float
>
()
+
voxel_size
.
numel
());
voxel_size
.
data_ptr
<
float
>
()
+
voxel_size
.
numel
());
...
@@ -71,4 +216,5 @@ void dynamic_voxelize_forward(const at::Tensor &points,
...
@@ -71,4 +216,5 @@ void dynamic_voxelize_forward(const at::Tensor &points,
coors_range
.
data_ptr
<
float
>
()
+
coors_range
.
numel
());
coors_range
.
data_ptr
<
float
>
()
+
coors_range
.
numel
());
dynamic_voxelize_forward_impl
(
points
,
coors
,
voxel_size_v
,
coors_range_v
,
dynamic_voxelize_forward_impl
(
points
,
coors
,
voxel_size_v
,
coors_range_v
,
NDim
);
NDim
);
#endif
}
}
setup.py
View file @
9b1209fa
...
@@ -210,6 +210,8 @@ def get_extensions():
...
@@ -210,6 +210,8 @@ def get_extensions():
extra_compile_args
[
'cxx'
]
=
[
'/std:c++14'
]
extra_compile_args
[
'cxx'
]
=
[
'/std:c++14'
]
include_dirs
=
[]
include_dirs
=
[]
library_dirs
=
[]
libraries
=
[]
extra_objects
=
[]
extra_objects
=
[]
extra_link_args
=
[]
extra_link_args
=
[]
...
@@ -221,7 +223,34 @@ def get_extensions():
...
@@ -221,7 +223,34 @@ def get_extensions():
except
ImportError
:
except
ImportError
:
pass
pass
if
is_rocm_pytorch
or
torch
.
cuda
.
is_available
()
or
os
.
getenv
(
if
os
.
getenv
(
'MMCV_WITH_DIOPI'
,
'0'
)
==
'1'
:
import
mmengine
# NOQA: F401
from
mmengine.utils.version_utils
import
digit_version
assert
digit_version
(
mmengine
.
__version__
)
>=
digit_version
(
'0.7.4'
),
f
'mmengine >= 0.7.4 is required
\
but
{
mmengine
.
__version__
}
is installed'
print
(
f
'Compiling
{
ext_name
}
with CPU and DIPU'
)
define_macros
+=
[(
'MMCV_WITH_DIOPI'
,
None
)]
define_macros
+=
[(
'DIOPI_ATTR_WEAK'
,
None
)]
op_files
=
glob
.
glob
(
'./mmcv/ops/csrc/pytorch/*.cpp'
)
+
\
glob
.
glob
(
'./mmcv/ops/csrc/pytorch/cpu/*.cpp'
)
extension
=
CppExtension
include_dirs
.
append
(
os
.
path
.
abspath
(
'./mmcv/ops/csrc/common'
))
dipu_root
=
os
.
getenv
(
'DIPU_ROOT'
)
diopi_path
=
os
.
getenv
(
'DIOPI_PATH'
)
dipu_path
=
os
.
getenv
(
'DIPU_PATH'
)
vendor_include_dirs
=
os
.
getenv
(
'VENDOR_INCLUDE_DIRS'
)
nccl_include_dirs
=
os
.
getenv
(
'NCCL_INCLUDE_DIRS'
)
include_dirs
.
append
(
dipu_root
)
include_dirs
.
append
(
diopi_path
+
'/include'
)
include_dirs
.
append
(
dipu_path
+
'/dist/include'
)
include_dirs
.
append
(
vendor_include_dirs
)
if
nccl_include_dirs
:
include_dirs
.
append
(
nccl_include_dirs
)
library_dirs
+=
[
dipu_root
]
libraries
+=
[
'torch_dipu'
]
elif
is_rocm_pytorch
or
torch
.
cuda
.
is_available
()
or
os
.
getenv
(
'FORCE_CUDA'
,
'0'
)
==
'1'
:
'FORCE_CUDA'
,
'0'
)
==
'1'
:
if
is_rocm_pytorch
:
if
is_rocm_pytorch
:
define_macros
+=
[(
'MMCV_WITH_HIP'
,
None
)]
define_macros
+=
[(
'MMCV_WITH_HIP'
,
None
)]
...
@@ -398,6 +427,8 @@ def get_extensions():
...
@@ -398,6 +427,8 @@ def get_extensions():
define_macros
=
define_macros
,
define_macros
=
define_macros
,
extra_objects
=
extra_objects
,
extra_objects
=
extra_objects
,
extra_compile_args
=
extra_compile_args
,
extra_compile_args
=
extra_compile_args
,
library_dirs
=
library_dirs
,
libraries
=
libraries
,
extra_link_args
=
extra_link_args
)
extra_link_args
=
extra_link_args
)
extensions
.
append
(
ext_ops
)
extensions
.
append
(
ext_ops
)
return
extensions
return
extensions
...
...
tests/test_ops/test_modulated_deform_conv.py
View file @
9b1209fa
...
@@ -7,6 +7,8 @@ import torch
...
@@ -7,6 +7,8 @@ import torch
from
mmengine.utils
import
digit_version
from
mmengine.utils
import
digit_version
from
mmengine.utils.dl_utils
import
TORCH_VERSION
from
mmengine.utils.dl_utils
import
TORCH_VERSION
from
mmcv.utils
import
IS_CUDA_AVAILABLE
try
:
try
:
# If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast
# If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast
# would be imported and used; we should test if our modules support it.
# would be imported and used; we should test if our modules support it.
...
@@ -111,13 +113,28 @@ class TestMdconv:
...
@@ -111,13 +113,28 @@ class TestMdconv:
assert
numpy
.
allclose
(
dcn
.
conv_offset
.
bias
.
grad
.
cpu
().
detach
().
numpy
(),
assert
numpy
.
allclose
(
dcn
.
conv_offset
.
bias
.
grad
.
cpu
().
detach
().
numpy
(),
dcn_offset_b_grad
,
1e-2
)
dcn_offset_b_grad
,
1e-2
)
def
test_mdconv
(
self
):
@
pytest
.
mark
.
parametrize
(
'device'
,
[
self
.
_test_mdconv
(
torch
.
double
,
device
=
'cpu'
)
'cpu'
,
self
.
_test_mdconv
(
torch
.
float
,
device
=
'cpu'
)
pytest
.
param
(
self
.
_test_mdconv
(
torch
.
double
)
'cuda'
,
self
.
_test_mdconv
(
torch
.
float
)
marks
=
pytest
.
mark
.
skipif
(
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
])
def
test_mdconv_float
(
self
,
device
):
self
.
_test_mdconv
(
dtype
=
torch
.
float
,
device
=
device
)
@
pytest
.
mark
.
parametrize
(
'device'
,
[
'cpu'
,
pytest
.
param
(
'cuda'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
])
def
test_mdconv_double
(
self
,
device
):
self
.
_test_mdconv
(
dtype
=
torch
.
double
,
device
=
device
)
def
test_mdconv_half
(
self
):
self
.
_test_mdconv
(
torch
.
half
)
self
.
_test_mdconv
(
torch
.
half
)
# test amp when torch version >= '1.6.0', the type of
# test amp when torch version >= '1.6.0', the type of
# input data for mdconv might be torch.float or torch.half
# input data for mdconv might be torch.float or torch.half
if
(
TORCH_VERSION
!=
'parrots'
if
(
TORCH_VERSION
!=
'parrots'
...
...
tests/test_ops/test_roi_align.py
View file @
9b1209fa
...
@@ -93,15 +93,7 @@ def _test_roialign_allclose(device, dtype):
...
@@ -93,15 +93,7 @@ def _test_roialign_allclose(device, dtype):
x
.
grad
.
data
.
type
(
torch
.
float
).
cpu
().
numpy
(),
np_grad
,
atol
=
1e-3
)
x
.
grad
.
data
.
type
(
torch
.
float
).
cpu
().
numpy
(),
np_grad
,
atol
=
1e-3
)
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
@
pytest
.
mark
.
parametrize
(
'dtype'
,
[
torch
.
float
,
torch
.
half
])
torch
.
float
,
pytest
.
param
(
torch
.
double
,
marks
=
pytest
.
mark
.
skipif
(
IS_MLU_AVAILABLE
or
IS_NPU_AVAILABLE
,
reason
=
'MLU and NPU do not support for 64-bit floating point'
)),
torch
.
half
])
@
pytest
.
mark
.
parametrize
(
'device'
,
[
@
pytest
.
mark
.
parametrize
(
'device'
,
[
'cpu'
,
'cpu'
,
pytest
.
param
(
pytest
.
param
(
...
@@ -117,8 +109,17 @@ def _test_roialign_allclose(device, dtype):
...
@@ -117,8 +109,17 @@ def _test_roialign_allclose(device, dtype):
marks
=
pytest
.
mark
.
skipif
(
marks
=
pytest
.
mark
.
skipif
(
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
not
IS_NPU_AVAILABLE
,
reason
=
'requires NPU support'
))
])
])
def
test_roialign
(
device
,
dtype
):
def
test_roialign_float
(
device
,
dtype
):
# check double only
if
dtype
is
torch
.
double
:
_test_roialign_gradcheck
(
device
=
device
,
dtype
=
dtype
)
_test_roialign_allclose
(
device
=
device
,
dtype
=
dtype
)
_test_roialign_allclose
(
device
=
device
,
dtype
=
dtype
)
@
pytest
.
mark
.
parametrize
(
'device'
,
[
'cpu'
,
pytest
.
param
(
'cuda'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
])
def
test_roialign_float64
(
device
):
_test_roialign_allclose
(
device
=
device
,
dtype
=
torch
.
double
)
_test_roialign_gradcheck
(
device
=
device
,
dtype
=
torch
.
double
)
tests/test_ops/test_voxelization.py
View file @
9b1209fa
...
@@ -139,12 +139,20 @@ def test_voxelization_nondeterministic():
...
@@ -139,12 +139,20 @@ def test_voxelization_nondeterministic():
assert
len
(
coors_set
)
==
len
(
coors
)
==
len
(
coors_all_set
)
assert
len
(
coors_set
)
==
len
(
coors
)
==
len
(
coors_all_set
)
@
pytest
.
mark
.
parametrize
(
'device_type'
,
[
@
pytest
.
mark
.
parametrize
(
'device_type'
,
[
pytest
.
param
(
# this is only used for dipu device testing case.
# dipu will mock to cuda automatically on mlu physical device.
'cuda:0'
,
marks
=
pytest
.
mark
.
skipif
(
not
IS_CUDA_AVAILABLE
,
reason
=
'requires CUDA support'
)),
pytest
.
param
(
pytest
.
param
(
'mlu'
,
'mlu'
,
marks
=
pytest
.
mark
.
skipif
(
marks
=
pytest
.
mark
.
skipif
(
not
IS_MLU_AVAILABLE
,
reason
=
'requires MLU support'
))
not
IS_MLU_AVAILABLE
,
reason
=
'requires MLU support'
))
])
])
def
test_voxelization_mlu
(
device_type
):
def
test_voxelization_mlu
(
device_type
):
voxel_size
=
[
0.5
,
0.5
,
0.5
]
voxel_size
=
[
0.5
,
0.5
,
0.5
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment