Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
91da9643
Commit
91da9643
authored
Aug 13, 2024
by
limm
Browse files
support v2.1.0
parent
6f674c7e
Changes
139
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
229 additions
and
2784 deletions
+229
-2784
mmcv/cnn/bricks/conv_module.py
mmcv/cnn/bricks/conv_module.py
+129
-3
mmcv/cnn/bricks/generalized_attention.py
mmcv/cnn/bricks/generalized_attention.py
+1
-1
mmcv/cnn/bricks/norm.py
mmcv/cnn/bricks/norm.py
+12
-9
mmcv/cnn/bricks/padding.py
mmcv/cnn/bricks/padding.py
+3
-1
mmcv/cnn/bricks/plugin.py
mmcv/cnn/bricks/plugin.py
+12
-9
mmcv/cnn/bricks/upsample.py
mmcv/cnn/bricks/upsample.py
+11
-7
mmcv/cnn/bricks/wrappers.py
mmcv/cnn/bricks/wrappers.py
+7
-7
mmcv/image/geometric.py
mmcv/image/geometric.py
+18
-16
mmcv/ops/__init__.py
mmcv/ops/__init__.py
+7
-0
mmcv/ops/bbox.py
mmcv/ops/bbox.py
+4
-0
mmcv/ops/box_iou_rotated.py
mmcv/ops/box_iou_rotated.py
+9
-1
mmcv/ops/conv2d_gradfix.py
mmcv/ops/conv2d_gradfix.py
+8
-16
mmcv/ops/corner_pool.py
mmcv/ops/corner_pool.py
+4
-2
mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
+4
-1
mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu
+0
-322
mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu
+0
-552
mmcv/ops/csrc/common/mlu/carafe_utils.hpp
mmcv/ops/csrc/common/mlu/carafe_utils.hpp
+0
-95
mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
+0
-142
mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu
+0
-712
mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu
+0
-888
No files found.
mmcv/cnn/bricks/conv_module.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
functools
import
partial
from
typing
import
Dict
,
Optional
,
Tuple
,
Union
import
torch
...
...
@@ -14,6 +15,56 @@ from .norm import build_norm_layer
from
.padding
import
build_padding_layer
def
efficient_conv_bn_eval_forward
(
bn
:
_BatchNorm
,
conv
:
nn
.
modules
.
conv
.
_ConvNd
,
x
:
torch
.
Tensor
):
"""
Implementation based on https://arxiv.org/abs/2305.11624
"Tune-Mode ConvBN Blocks For Efficient Transfer Learning"
It leverages the associative law between convolution and affine transform,
i.e., normalize (weight conv feature) = (normalize weight) conv feature.
It works for Eval mode of ConvBN blocks during validation, and can be used
for training as well. It reduces memory and computation cost.
Args:
bn (_BatchNorm): a BatchNorm module.
conv (nn._ConvNd): a conv module
x (torch.Tensor): Input feature map.
"""
# These lines of code are designed to deal with various cases
# like bn without affine transform, and conv without bias
weight_on_the_fly
=
conv
.
weight
if
conv
.
bias
is
not
None
:
bias_on_the_fly
=
conv
.
bias
else
:
bias_on_the_fly
=
torch
.
zeros_like
(
bn
.
running_var
)
if
bn
.
weight
is
not
None
:
bn_weight
=
bn
.
weight
else
:
bn_weight
=
torch
.
ones_like
(
bn
.
running_var
)
if
bn
.
bias
is
not
None
:
bn_bias
=
bn
.
bias
else
:
bn_bias
=
torch
.
zeros_like
(
bn
.
running_var
)
# shape of [C_out, 1, 1, 1] in Conv2d
weight_coeff
=
torch
.
rsqrt
(
bn
.
running_var
+
bn
.
eps
).
reshape
([
-
1
]
+
[
1
]
*
(
len
(
conv
.
weight
.
shape
)
-
1
))
# shape of [C_out, 1, 1, 1] in Conv2d
coefff_on_the_fly
=
bn_weight
.
view_as
(
weight_coeff
)
*
weight_coeff
# shape of [C_out, C_in, k, k] in Conv2d
weight_on_the_fly
=
weight_on_the_fly
*
coefff_on_the_fly
# shape of [C_out] in Conv2d
bias_on_the_fly
=
bn_bias
+
coefff_on_the_fly
.
flatten
()
*
\
(
bias_on_the_fly
-
bn
.
running_mean
)
return
conv
.
_conv_forward
(
x
,
weight_on_the_fly
,
bias_on_the_fly
)
@
MODELS
.
register_module
()
class
ConvModule
(
nn
.
Module
):
"""A conv block that bundles conv/norm/activation layers.
...
...
@@ -65,6 +116,9 @@ class ConvModule(nn.Module):
sequence of "conv", "norm" and "act". Common examples are
("conv", "norm", "act") and ("act", "conv", "norm").
Default: ('conv', 'norm', 'act').
efficient_conv_bn_eval (bool): Whether use efficient conv when the
consecutive bn is in eval mode (either training or testing), as
proposed in https://arxiv.org/abs/2305.11624 . Default: `False`.
"""
_abbr_
=
'conv_block'
...
...
@@ -84,7 +138,8 @@ class ConvModule(nn.Module):
inplace
:
bool
=
True
,
with_spectral_norm
:
bool
=
False
,
padding_mode
:
str
=
'zeros'
,
order
:
tuple
=
(
'conv'
,
'norm'
,
'act'
)):
order
:
tuple
=
(
'conv'
,
'norm'
,
'act'
),
efficient_conv_bn_eval
:
bool
=
False
):
super
().
__init__
()
assert
conv_cfg
is
None
or
isinstance
(
conv_cfg
,
dict
)
assert
norm_cfg
is
None
or
isinstance
(
norm_cfg
,
dict
)
...
...
@@ -155,6 +210,8 @@ class ConvModule(nn.Module):
else
:
self
.
norm_name
=
None
# type: ignore
self
.
turn_on_efficient_conv_bn_eval
(
efficient_conv_bn_eval
)
# build activation layer
if
self
.
with_activation
:
act_cfg_
=
act_cfg
.
copy
()
# type: ignore
...
...
@@ -200,13 +257,82 @@ class ConvModule(nn.Module):
x
:
torch
.
Tensor
,
activate
:
bool
=
True
,
norm
:
bool
=
True
)
->
torch
.
Tensor
:
for
layer
in
self
.
order
:
layer_index
=
0
while
layer_index
<
len
(
self
.
order
):
layer
=
self
.
order
[
layer_index
]
if
layer
==
'conv'
:
if
self
.
with_explicit_padding
:
x
=
self
.
padding_layer
(
x
)
x
=
self
.
conv
(
x
)
# if the next operation is norm and we have a norm layer in
# eval mode and we have enabled `efficient_conv_bn_eval` for
# the conv operator, then activate the optimized forward and
# skip the next norm operator since it has been fused
if
layer_index
+
1
<
len
(
self
.
order
)
and
\
self
.
order
[
layer_index
+
1
]
==
'norm'
and
norm
and
\
self
.
with_norm
and
not
self
.
norm
.
training
and
\
self
.
efficient_conv_bn_eval_forward
is
not
None
:
self
.
conv
.
forward
=
partial
(
self
.
efficient_conv_bn_eval_forward
,
self
.
norm
,
self
.
conv
)
layer_index
+=
1
x
=
self
.
conv
(
x
)
del
self
.
conv
.
forward
else
:
x
=
self
.
conv
(
x
)
elif
layer
==
'norm'
and
norm
and
self
.
with_norm
:
x
=
self
.
norm
(
x
)
elif
layer
==
'act'
and
activate
and
self
.
with_activation
:
x
=
self
.
activate
(
x
)
layer_index
+=
1
return
x
def
turn_on_efficient_conv_bn_eval
(
self
,
efficient_conv_bn_eval
=
True
):
# efficient_conv_bn_eval works for conv + bn
# with `track_running_stats` option
if
efficient_conv_bn_eval
and
self
.
norm
\
and
isinstance
(
self
.
norm
,
_BatchNorm
)
\
and
self
.
norm
.
track_running_stats
:
self
.
efficient_conv_bn_eval_forward
=
efficient_conv_bn_eval_forward
# noqa: E501
else
:
self
.
efficient_conv_bn_eval_forward
=
None
# type: ignore
@
staticmethod
def
create_from_conv_bn
(
conv
:
torch
.
nn
.
modules
.
conv
.
_ConvNd
,
bn
:
torch
.
nn
.
modules
.
batchnorm
.
_BatchNorm
,
efficient_conv_bn_eval
=
True
)
->
'ConvModule'
:
"""Create a ConvModule from a conv and a bn module."""
self
=
ConvModule
.
__new__
(
ConvModule
)
super
(
ConvModule
,
self
).
__init__
()
self
.
conv_cfg
=
None
self
.
norm_cfg
=
None
self
.
act_cfg
=
None
self
.
inplace
=
False
self
.
with_spectral_norm
=
False
self
.
with_explicit_padding
=
False
self
.
order
=
(
'conv'
,
'norm'
,
'act'
)
self
.
with_norm
=
True
self
.
with_activation
=
False
self
.
with_bias
=
conv
.
bias
is
not
None
# build convolution layer
self
.
conv
=
conv
# export the attributes of self.conv to a higher level for convenience
self
.
in_channels
=
self
.
conv
.
in_channels
self
.
out_channels
=
self
.
conv
.
out_channels
self
.
kernel_size
=
self
.
conv
.
kernel_size
self
.
stride
=
self
.
conv
.
stride
self
.
padding
=
self
.
conv
.
padding
self
.
dilation
=
self
.
conv
.
dilation
self
.
transposed
=
self
.
conv
.
transposed
self
.
output_padding
=
self
.
conv
.
output_padding
self
.
groups
=
self
.
conv
.
groups
# build normalization layers
self
.
norm_name
,
norm
=
'bn'
,
bn
self
.
add_module
(
self
.
norm_name
,
norm
)
self
.
turn_on_efficient_conv_bn_eval
(
efficient_conv_bn_eval
)
return
self
mmcv/cnn/bricks/generalized_attention.py
View file @
91da9643
...
...
@@ -371,7 +371,7 @@ class GeneralizedAttention(nn.Module):
contiguous
().
\
view
(
1
,
1
,
h
*
w
,
h_kv
*
w_kv
)
energy
=
energy
.
masked_fill_
(
cur_local_constraint_map
,
energy
=
energy
.
masked_fill_
(
cur_local_constraint_map
.
bool
()
,
float
(
'-inf'
))
attention
=
F
.
softmax
(
energy
,
3
)
...
...
mmcv/cnn/bricks/norm.py
View file @
91da9643
...
...
@@ -98,14 +98,17 @@ def build_norm_layer(cfg: Dict,
layer_type
=
cfg_
.
pop
(
'type'
)
# Switch registry to the target scope. If `norm_layer` cannot be found
# in the registry, fallback to search `norm_layer` in the
# mmengine.MODELS.
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
norm_layer
=
registry
.
get
(
layer_type
)
if
norm_layer
is
None
:
raise
KeyError
(
f
'Cannot find
{
norm_layer
}
in registry under scope '
f
'name
{
registry
.
scope
}
'
)
if
inspect
.
isclass
(
layer_type
):
norm_layer
=
layer_type
else
:
# Switch registry to the target scope. If `norm_layer` cannot be found
# in the registry, fallback to search `norm_layer` in the
# mmengine.MODELS.
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
norm_layer
=
registry
.
get
(
layer_type
)
if
norm_layer
is
None
:
raise
KeyError
(
f
'Cannot find
{
norm_layer
}
in registry under '
f
'scope name
{
registry
.
scope
}
'
)
abbr
=
infer_abbr
(
norm_layer
)
assert
isinstance
(
postfix
,
(
int
,
str
))
...
...
@@ -113,7 +116,7 @@ def build_norm_layer(cfg: Dict,
requires_grad
=
cfg_
.
pop
(
'requires_grad'
,
True
)
cfg_
.
setdefault
(
'eps'
,
1e-5
)
if
layer_type
!=
'GN'
:
if
norm_layer
is
not
nn
.
GroupNorm
:
layer
=
norm_layer
(
num_features
,
**
cfg_
)
if
layer_type
==
'SyncBN'
and
hasattr
(
layer
,
'_specify_ddp_gpu_num'
):
layer
.
_specify_ddp_gpu_num
(
1
)
...
...
mmcv/cnn/bricks/padding.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
import
inspect
from
typing
import
Dict
import
torch.nn
as
nn
...
...
@@ -27,7 +28,8 @@ def build_padding_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
cfg_
=
cfg
.
copy
()
padding_type
=
cfg_
.
pop
(
'type'
)
if
inspect
.
isclass
(
padding_type
):
return
padding_type
(
*
args
,
**
kwargs
,
**
cfg_
)
# Switch registry to the target scope. If `padding_layer` cannot be found
# in the registry, fallback to search `padding_layer` in the
# mmengine.MODELS.
...
...
mmcv/cnn/bricks/plugin.py
View file @
91da9643
...
...
@@ -79,15 +79,18 @@ def build_plugin_layer(cfg: Dict,
cfg_
=
cfg
.
copy
()
layer_type
=
cfg_
.
pop
(
'type'
)
# Switch registry to the target scope. If `plugin_layer` cannot be found
# in the registry, fallback to search `plugin_layer` in the
# mmengine.MODELS.
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
plugin_layer
=
registry
.
get
(
layer_type
)
if
plugin_layer
is
None
:
raise
KeyError
(
f
'Cannot find
{
plugin_layer
}
in registry under scope '
f
'name
{
registry
.
scope
}
'
)
if
inspect
.
isclass
(
layer_type
):
plugin_layer
=
layer_type
else
:
# Switch registry to the target scope. If `plugin_layer` cannot be
# found in the registry, fallback to search `plugin_layer` in the
# mmengine.MODELS.
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
plugin_layer
=
registry
.
get
(
layer_type
)
if
plugin_layer
is
None
:
raise
KeyError
(
f
'Cannot find
{
plugin_layer
}
in registry under scope '
f
'name
{
registry
.
scope
}
'
)
abbr
=
infer_abbr
(
plugin_layer
)
assert
isinstance
(
postfix
,
(
int
,
str
))
...
...
mmcv/cnn/bricks/upsample.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
import
inspect
from
typing
import
Dict
import
torch
...
...
@@ -76,15 +77,18 @@ def build_upsample_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
layer_type
=
cfg_
.
pop
(
'type'
)
if
inspect
.
isclass
(
layer_type
):
upsample
=
layer_type
# Switch registry to the target scope. If `upsample` cannot be found
# in the registry, fallback to search `upsample` in the
# mmengine.MODELS.
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
upsample
=
registry
.
get
(
layer_type
)
if
upsample
is
None
:
raise
KeyError
(
f
'Cannot find
{
upsample
}
in registry under scope '
f
'name
{
registry
.
scope
}
'
)
if
upsample
is
nn
.
Upsample
:
cfg_
[
'mode'
]
=
layer_type
else
:
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
upsample
=
registry
.
get
(
layer_type
)
if
upsample
is
None
:
raise
KeyError
(
f
'Cannot find
{
upsample
}
in registry under scope '
f
'name
{
registry
.
scope
}
'
)
if
upsample
is
nn
.
Upsample
:
cfg_
[
'mode'
]
=
layer_type
layer
=
upsample
(
*
args
,
**
kwargs
,
**
cfg_
)
return
layer
mmcv/cnn/bricks/wrappers.py
View file @
91da9643
...
...
@@ -41,7 +41,7 @@ class NewEmptyTensorOp(torch.autograd.Function):
class
Conv2d
(
nn
.
Conv2d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
2
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
self
.
dilation
):
...
...
@@ -62,7 +62,7 @@ class Conv2d(nn.Conv2d):
class
Conv3d
(
nn
.
Conv3d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
3
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
self
.
dilation
):
...
...
@@ -84,7 +84,7 @@ class Conv3d(nn.Conv3d):
class
ConvTranspose2d
(
nn
.
ConvTranspose2d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
,
op
in
zip
(
x
.
shape
[
-
2
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
...
...
@@ -106,7 +106,7 @@ class ConvTranspose2d(nn.ConvTranspose2d):
class
ConvTranspose3d
(
nn
.
ConvTranspose3d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
,
op
in
zip
(
x
.
shape
[
-
3
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
...
...
@@ -127,7 +127,7 @@ class MaxPool2d(nn.MaxPool2d):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# PyTorch 1.9 does not support empty tensor inference yet
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
x
.
numel
()
==
0
:
out_shape
=
list
(
x
.
shape
[:
2
])
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
2
:],
_pair
(
self
.
kernel_size
),
_pair
(
self
.
padding
),
_pair
(
self
.
stride
),
...
...
@@ -145,7 +145,7 @@ class MaxPool3d(nn.MaxPool3d):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# PyTorch 1.9 does not support empty tensor inference yet
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
x
.
numel
()
==
0
:
out_shape
=
list
(
x
.
shape
[:
2
])
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
3
:],
_triple
(
self
.
kernel_size
),
_triple
(
self
.
padding
),
...
...
@@ -164,7 +164,7 @@ class Linear(torch.nn.Linear):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# empty tensor forward of Linear layer is supported in Pytorch 1.6
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
5
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
5
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_features
]
empty
=
NewEmptyTensorOp
.
apply
(
x
,
out_shape
)
if
self
.
training
:
...
...
mmcv/image/geometric.py
View file @
91da9643
...
...
@@ -16,13 +16,13 @@ except ImportError:
def
_scale_size
(
size
:
Tuple
[
int
,
int
],
scale
:
Union
[
float
,
int
,
tuple
],
scale
:
Union
[
float
,
int
,
Tuple
[
float
,
float
],
Tuple
[
int
,
int
]
],
)
->
Tuple
[
int
,
int
]:
"""Rescale a size by a ratio.
Args:
size (tuple[int]): (w, h).
scale (float | tuple(float)): Scaling factor.
scale (float |
int |
tuple(float)
| tuple(int)
): Scaling factor.
Returns:
tuple[int]: scaled size.
...
...
@@ -128,7 +128,8 @@ def imresize_to_multiple(
img
:
np
.
ndarray
,
divisor
:
Union
[
int
,
Tuple
[
int
,
int
]],
size
:
Union
[
int
,
Tuple
[
int
,
int
],
None
]
=
None
,
scale_factor
:
Union
[
float
,
Tuple
[
float
,
float
],
None
]
=
None
,
scale_factor
:
Union
[
float
,
int
,
Tuple
[
float
,
float
],
Tuple
[
int
,
int
],
None
]
=
None
,
keep_ratio
:
bool
=
False
,
return_scale
:
bool
=
False
,
interpolation
:
str
=
'bilinear'
,
...
...
@@ -145,9 +146,10 @@ def imresize_to_multiple(
divisor. If divisor is a tuple, divisor should be
(w_divisor, h_divisor).
size (None | int | tuple[int]): Target size (w, h). Default: None.
scale_factor (None | float | tuple[float]): Multiplier for spatial
size. Should match input size if it is a tuple and the 2D style is
(w_scale_factor, h_scale_factor). Default: None.
scale_factor (None | float | int | tuple[float] | tuple[int]):
Multiplier for spatial size. Should match input size if it is a
tuple and the 2D style is (w_scale_factor, h_scale_factor).
Default: None.
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
image. Default: False.
return_scale (bool): Whether to return `w_scale` and `h_scale`.
...
...
@@ -215,16 +217,16 @@ def imresize_like(
def
rescale_size
(
old_size
:
tuple
,
scale
:
Union
[
float
,
int
,
t
uple
],
scale
:
Union
[
float
,
int
,
T
uple
[
int
,
int
]
],
return_scale
:
bool
=
False
)
->
tuple
:
"""Calculate the new size to be rescaled to.
Args:
old_size (tuple[int]): The old size (w, h) of image.
scale (float | tuple[int]): The scaling factor or maximum size.
If it is a float number, then the image will be
rescaled by this
factor, else if it is a tuple of 2 integers, then
the image will
be rescaled as large as possible within the scale.
scale (float |
int |
tuple[int]): The scaling factor or maximum size.
If it is a float number
or an integer
, then the image will be
rescaled by this
factor, else if it is a tuple of 2 integers, then
the image will
be rescaled as large as possible within the scale.
return_scale (bool): Whether to return the scaling factor besides the
rescaled image size.
...
...
@@ -255,7 +257,7 @@ def rescale_size(old_size: tuple,
def
imrescale
(
img
:
np
.
ndarray
,
scale
:
Union
[
float
,
Tuple
[
int
,
int
]],
scale
:
Union
[
float
,
int
,
Tuple
[
int
,
int
]],
return_scale
:
bool
=
False
,
interpolation
:
str
=
'bilinear'
,
backend
:
Optional
[
str
]
=
None
...
...
@@ -264,10 +266,10 @@ def imrescale(
Args:
img (ndarray): The input image.
scale (float | tuple[int]): The scaling factor or maximum size.
If it is a float number, then the image will be
rescaled by this
factor, else if it is a tuple of 2 integers, then
the image will
be rescaled as large as possible within the scale.
scale (float |
int |
tuple[int]): The scaling factor or maximum size.
If it is a float number
or an integer
, then the image will be
rescaled by this
factor, else if it is a tuple of 2 integers, then
the image will
be rescaled as large as possible within the scale.
return_scale (bool): Whether to return the scaling factor besides the
rescaled image.
interpolation (str): Same as :func:`resize`.
...
...
mmcv/ops/__init__.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
from
mmcv.utils
import
IS_MLU_AVAILABLE
from
.active_rotated_filter
import
active_rotated_filter
from
.assign_score_withk
import
assign_score_withk
from
.ball_query
import
ball_query
...
...
@@ -109,3 +110,9 @@ __all__ = [
'PrRoIPool'
,
'prroi_pool'
,
'bias_act'
,
'filtered_lrelu'
,
'conv2d'
,
'conv_transpose2d'
,
'filter2d'
,
'upsample2d'
,
'BezierAlign'
,
'bezier_align'
]
if
IS_MLU_AVAILABLE
:
from
.deform_conv
import
DeformConv2dPack_MLU
# noqa:F401
from
.modulated_deform_conv
import
\
ModulatedDeformConv2dPack_MLU
# noqa:F401
__all__
.
extend
([
'ModulatedDeformConv2dPack_MLU'
,
'DeformConv2dPack_MLU'
])
mmcv/ops/bbox.py
View file @
91da9643
...
...
@@ -116,6 +116,10 @@ def bbox_overlaps(bboxes1: torch.Tensor,
if
rows
*
cols
==
0
:
return
ious
if
bboxes1
.
device
.
type
==
'cpu'
and
torch
.
__version__
==
'parrots'
:
return
_bbox_overlaps_cpu
(
bboxes1
,
bboxes2
,
mode
=
mode
,
aligned
=
aligned
,
offset
=
offset
)
ext_module
.
bbox_overlaps
(
bboxes1
,
bboxes2
,
ious
,
mode
=
mode_flag
,
aligned
=
aligned
,
offset
=
offset
)
...
...
mmcv/ops/box_iou_rotated.py
View file @
91da9643
...
...
@@ -133,12 +133,20 @@ def box_iou_rotated(bboxes1: torch.Tensor,
if
aligned
:
ious
=
bboxes1
.
new_zeros
(
rows
)
else
:
ious
=
bboxes1
.
new_zeros
(
rows
*
cols
)
if
bboxes1
.
device
.
type
==
'mlu'
:
ious
=
bboxes1
.
new_zeros
([
rows
,
cols
])
else
:
ious
=
bboxes1
.
new_zeros
(
rows
*
cols
)
if
not
clockwise
:
flip_mat
=
bboxes1
.
new_ones
(
bboxes1
.
shape
[
-
1
])
flip_mat
[
-
1
]
=
-
1
bboxes1
=
bboxes1
*
flip_mat
bboxes2
=
bboxes2
*
flip_mat
if
bboxes1
.
device
.
type
==
'npu'
:
scale_mat
=
bboxes1
.
new_ones
(
bboxes1
.
shape
[
-
1
])
scale_mat
[
-
1
]
=
1.0
/
0.01745329252
bboxes1
=
bboxes1
*
scale_mat
bboxes2
=
bboxes2
*
scale_mat
bboxes1
=
bboxes1
.
contiguous
()
bboxes2
=
bboxes2
.
contiguous
()
ext_module
.
box_iou_rotated
(
...
...
mmcv/ops/conv2d_gradfix.py
View file @
91da9643
...
...
@@ -16,6 +16,7 @@ from typing import Dict, Optional, Tuple, Union
import
torch
from
mmengine.utils
import
digit_version
from
mmengine.utils.dl_utils.parrots_wrapper
import
is_rocm_pytorch
enabled
=
True
weight_gradients_disabled
=
False
...
...
@@ -283,28 +284,19 @@ def _conv2d_gradfix(
output_padding
=
output_padding
,
output_mask
=
[
0
,
1
,
0
])[
1
]
else
:
is_rocm_pytorch
=
False
try
:
from
torch.utils.cpp_extension
import
ROCM_HOME
is_rocm_pytorch
=
True
if
((
torch
.
version
.
hip
is
not
None
)
and
(
ROCM_HOME
is
not
None
))
else
False
except
ImportError
:
pass
name
=
''
flags
=
[]
if
is_rocm_pytorch
:
name
=
(
'aten::miopen_convolution_transpose_backward_weight'
if
transpose
else
'aten::miopen_convolution_backward_weight'
)
if
is_rocm_pytorch
():
name
=
'aten::miopen_convolution_transpose_backward_weight'
if
not
transpose
:
name
=
'aten::miopen_convolution_backward_weight'
flags
=
[
torch
.
backends
.
cudnn
.
benchmark
,
torch
.
backends
.
cudnn
.
deterministic
]
else
:
# General case => cuDNN.
# General case => cuDNN.
name
=
(
'aten::cudnn_convolution_transpose_backward_weight'
if
transpose
else
'aten::cudnn_convolution_backward_weight'
)
if
transpose
else
'aten::cudnn_convolution_backward_weight'
)
flags
=
[
torch
.
backends
.
cudnn
.
benchmark
,
torch
.
backends
.
cudnn
.
deterministic
,
...
...
mmcv/ops/corner_pool.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch
import
Tensor
,
nn
from
mmengine.utils
import
digit_version
from
torch
import
Tensor
,
nn
_mode_dict
=
{
'top'
:
0
,
'bottom'
:
1
,
'left'
:
2
,
'right'
:
3
}
...
...
@@ -70,7 +71,8 @@ class CornerPool(nn.Module):
self
.
mode
=
mode
def
forward
(
self
,
x
:
Tensor
)
->
Tensor
:
if
torch
.
__version__
!=
'parrots'
and
digit_version
(
torch
.
__version__
)
>=
digit_version
(
'1.5.0'
):
if
(
torch
.
__version__
!=
'parrots'
and
digit_version
(
torch
.
__version__
)
>=
digit_version
(
'1.5.0'
)):
dim
,
flip
=
self
.
cummax_dim_flip
[
self
.
mode
]
if
flip
:
x
=
x
.
flip
(
dim
)
...
...
mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
View file @
91da9643
...
...
@@ -2,6 +2,8 @@
#ifndef CARAFE_CUDA_KERNEL_CUH
#define CARAFE_CUDA_KERNEL_CUH
#include <ATen/cuda/DeviceUtils.cuh>
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
...
...
@@ -56,7 +58,8 @@ template <>
__device__
__forceinline__
phalf
warpReduceSum
(
phalf
val
)
{
for
(
int
offset
=
WARP_SIZE
/
2
;
offset
>
0
;
offset
/=
2
)
#ifdef MMCV_WITH_HIP
__PHALF
(
val
)
+=
__shfl_down
(
val
,
offset
);
// Using PyTorch's macro for half support
__PHALF
(
val
)
+=
WARP_SHFL_DOWN
(
val
,
offset
);
#else
__PHALF
(
val
)
+=
__shfl_down_sync
(
FULL_MASK
,
__PHALF
(
val
).
operator
__half
(),
offset
);
...
...
mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
/*************************************************************************
* Copyright (C) 2021 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#include <float.h>
#include "common_mlu_helper.hpp"
#define COORD_NUM 4
__nram__ char nmem_buf[MAX_NRAM_SIZE];
template <typename T>
__mlu_func__ void computeDiv(void *nram_dst, void *nram_src0, void *nram_src1,
void *nram_addition, const int32_t deal_num) {
__bang_active_reciphp((T *)nram_dst, (T *)nram_src1, deal_num);
__bang_mul((T *)nram_dst, (T *)nram_src0, (T *)nram_dst, deal_num);
}
template <>
__mlu_func__ void computeDiv<half>(void *nram_dst, void *nram_src0,
void *nram_src1, void *nram_addition,
const int32_t deal_num) {
__bang_half2float((float *)nram_addition, (half *)nram_src1, deal_num);
__bang_active_reciphp((float *)nram_addition, (float *)nram_addition,
deal_num);
__bang_float2half_rd((half *)nram_src1, (float *)nram_addition, deal_num);
__bang_mul((half *)nram_dst, (half *)nram_src0, (half *)nram_src1, deal_num);
}
template <typename T>
__mlu_func__ void bboxOverlapsWorkflow(
T *vec_b1_x1, T *vec_b1_y1, T *vec_b1_x2, T *vec_b1_y2, T *vec_b2_x1,
T *vec_b2_y1, T *vec_b2_x2, T *vec_b2_y2, T *vec_left, T *vec_right,
T *vec_top, T *vec_bottom, const T *bbox1, const T *bbox2, void *ious,
const int32_t offset, const int32_t mode, const int32_t batches_stride,
const int32_t num_bbox1, const int32_t num_bbox2, const bool aligned) {
int32_t task_batch_stride = (num_bbox1 + taskDim - 1) / taskDim;
int32_t batch_start = taskId * task_batch_stride;
int32_t batch_per_task = batch_start + task_batch_stride < num_bbox1
? task_batch_stride
: num_bbox1 - batch_start;
batch_per_task = batch_per_task > 0 ? batch_per_task : (0);
if (aligned) {
int32_t num_loop_cpy = batch_per_task / batches_stride;
int32_t num_rem_cpy_batches = batch_per_task % batches_stride;
num_loop_cpy = num_rem_cpy_batches > 0 ? num_loop_cpy + 1 : num_loop_cpy;
for (int32_t i = 0; i < num_loop_cpy; i++) {
int32_t index = batch_start + i * batches_stride;
int32_t handle_batches = index + batches_stride > num_bbox1
? num_rem_cpy_batches
: batches_stride;
int32_t b1 = index;
int32_t b2 = index;
int32_t base1 = b1 * COORD_NUM;
__memcpy(vec_b1_x1, &bbox1[base1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b1_y1, &bbox1[base1 + 1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b1_x2, &bbox1[base1 + 2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b1_y2, &bbox1[base1 + 3], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
int32_t base2 = b2 * COORD_NUM;
__memcpy(vec_b2_x1, &bbox2[base2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y1, &bbox2[base2 + 1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_x2, &bbox2[base2 + 2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y2, &bbox2[base2 + 3], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
// get the width and height
__bang_maxequal(vec_left, vec_b1_x1, vec_b2_x1, batches_stride);
__bang_minequal(vec_right, vec_b1_x2, vec_b2_x2, batches_stride);
__bang_maxequal(vec_top, vec_b1_y1, vec_b2_y1, batches_stride);
__bang_minequal(vec_bottom, vec_b1_y2, vec_b2_y2, batches_stride);
// right - left + offset ---> left
__bang_sub(vec_left, vec_right, vec_left, batches_stride);
__bang_add_scalar(vec_left, vec_left, (T)offset, batches_stride);
// bottom - top + offset ---> right
__bang_sub(vec_right, vec_bottom, vec_top, batches_stride);
__bang_add_scalar(vec_right, vec_right, (T)offset, batches_stride);
// zero vector ---> bottom
__bang_write_value(vec_bottom, batches_stride, 0.f);
// width --> vec_left
__bang_maxequal(vec_left, vec_bottom, vec_left, batches_stride);
T *width = vec_left;
// height --> vec_right
__bang_maxequal(vec_right, vec_bottom, vec_right, batches_stride);
T *height = vec_right;
// get the b1_area
// (b1_x2 - b1_x1 + offset) ---> vec_top
__bang_sub(vec_top, vec_b1_x2, vec_b1_x1, batches_stride);
__bang_add_scalar(vec_top, vec_top, (T)offset, batches_stride);
// (b1_y2 - b1_y1 + offset) ---> vec_bottom
__bang_sub(vec_bottom, vec_b1_y2, vec_b1_y1, batches_stride);
__bang_add_scalar(vec_bottom, vec_bottom, (T)offset, batches_stride);
// b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset)
// ---> vec_top;
__bang_mul(vec_top, vec_top, vec_bottom, batches_stride);
T *b1_area = vec_top;
// get the b2_area
// (b2_x2 - b2_x1 + offset) ---> b2_x1
__bang_sub(vec_b2_x1, vec_b2_x2, vec_b2_x1, batches_stride);
__bang_add_scalar(vec_b2_x1, vec_b2_x1, (T)offset, batches_stride);
// (b2_y2 - b2_y1 + offset) ---> b2_y1
__bang_sub(vec_b2_y1, vec_b2_y2, vec_b2_y1, batches_stride);
__bang_add_scalar(vec_b2_y1, vec_b2_y1, (T)offset, batches_stride);
// b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset)
// ---> b2_x1;
__bang_mul(vec_b2_x1, vec_b2_x1, vec_b2_y1, batches_stride);
T *b2_area = vec_b2_x1;
// inter_s = width * height
__bang_mul(height, width, height, batches_stride);
T *inter_s = height;
// offset vector ---> vec_b2_y1
__bang_write_value(vec_b2_y1, batches_stride, T(offset));
T *vec_offset = vec_b2_y1;
if (mode == 0) {
__bang_add(b1_area, b1_area, b2_area, batches_stride);
__bang_sub(b1_area, b1_area, inter_s, batches_stride);
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
} else {
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
}
T *base_s = b1_area;
// ious = inter_s / base_s
computeDiv<T>(width, inter_s, base_s, vec_b2_x2, batches_stride);
__memcpy((T *)ious + index, width, handle_batches * sizeof(T),
NRAM2GDRAM);
}
} else {
int32_t num_loop_cpy = num_bbox2 / batches_stride;
int32_t num_rem_cpy_batches = num_bbox2 % batches_stride;
num_loop_cpy = num_rem_cpy_batches > 0 ? num_loop_cpy + 1 : num_loop_cpy;
for (int32_t i = 0; i < batch_per_task; i++) {
int32_t index1 = batch_start + i;
int32_t b1 = index1;
int32_t base1 = b1 * COORD_NUM;
// set bbox1 and bbox2 to nram
__bang_write_value(vec_b1_x1, batches_stride, bbox1[base1]);
__bang_write_value(vec_b1_y1, batches_stride, bbox1[base1 + 1]);
__bang_write_value(vec_b1_x2, batches_stride, bbox1[base1 + 2]);
__bang_write_value(vec_b1_y2, batches_stride, bbox1[base1 + 3]);
for (int32_t j = 0; j < num_loop_cpy; j++) {
int32_t index2 = j * batches_stride;
int32_t handle_batches = index2 + batches_stride > num_bbox2
? num_rem_cpy_batches
: batches_stride;
int32_t b2 = index2;
int32_t base2 = b2 * COORD_NUM;
// copy bbox2 to nram
__memcpy(vec_b2_x1, &bbox2[base2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y1, &bbox2[base2 + 1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_x2, &bbox2[base2 + 2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y2, &bbox2[base2 + 3], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
// get the width and height
__bang_maxequal(vec_left, vec_b1_x1, vec_b2_x1, batches_stride);
__bang_minequal(vec_right, vec_b1_x2, vec_b2_x2, batches_stride);
__bang_maxequal(vec_top, vec_b1_y1, vec_b2_y1, batches_stride);
__bang_minequal(vec_bottom, vec_b1_y2, vec_b2_y2, batches_stride);
// right - left + offset ---> left
__bang_sub(vec_left, vec_right, vec_left, batches_stride);
__bang_add_scalar(vec_left, vec_left, (T)offset, batches_stride);
// bottom - top + offset ---> right
__bang_sub(vec_right, vec_bottom, vec_top, batches_stride);
__bang_add_scalar(vec_right, vec_right, (T)offset, batches_stride);
// zero vector ---> bottom
__bang_write_value(vec_bottom, batches_stride, (T)0);
// width --> vec_left
__bang_maxequal(vec_left, vec_bottom, vec_left, batches_stride);
T *width = vec_left;
// height --> vec_right
__bang_maxequal(vec_right, vec_bottom, vec_right, batches_stride);
T *height = vec_right;
// get the b1_area
// (b1_x2 - b1_x1 + offset) ---> vec_top
__bang_sub(vec_top, vec_b1_x2, vec_b1_x1, batches_stride);
__bang_add_scalar(vec_top, vec_top, (T)offset, batches_stride);
// (b1_y2 - b1_y1 + offset) ---> vec_bottom
__bang_sub(vec_bottom, vec_b1_y2, vec_b1_y1, batches_stride);
__bang_add_scalar(vec_bottom, vec_bottom, (T)offset, batches_stride);
// b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset)
// ---> vec_top;
__bang_mul(vec_top, vec_top, vec_bottom, batches_stride);
T *b1_area = vec_top;
// get the b2_area
// (b2_x2 - b2_x1 + offset) ---> b2_x1
__bang_sub(vec_b2_x1, vec_b2_x2, vec_b2_x1, batches_stride);
__bang_add_scalar(vec_b2_x1, vec_b2_x1, (T)offset, batches_stride);
// (b2_y2 - b2_y1 + offset) ---> b2_y1
__bang_sub(vec_b2_y1, vec_b2_y2, vec_b2_y1, batches_stride);
__bang_add_scalar(vec_b2_y1, vec_b2_y1, (T)offset, batches_stride);
// b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset)
// ---> b2_x1;
__bang_mul(vec_b2_x1, vec_b2_x1, vec_b2_y1, batches_stride);
T *b2_area = vec_b2_x1;
// inter_s = width * height
__bang_mul(height, width, height, batches_stride);
T *inter_s = height;
// offset vector ---> vec_b2_y1
__bang_write_value(vec_b2_y1, batches_stride, T(offset));
T *vec_offset = vec_b2_y1;
if (mode == 0) {
__bang_add(b1_area, b1_area, b2_area, batches_stride);
__bang_sub(b1_area, b1_area, inter_s, batches_stride);
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
} else {
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
}
T *base_s = b1_area;
// ious = inter_s / base_s
computeDiv<T>(width, inter_s, base_s, vec_b2_x2, batches_stride);
int32_t gdram_offset = index1 * num_bbox2 + index2;
__memcpy((T *)ious + gdram_offset, width, handle_batches * sizeof(T),
NRAM2GDRAM);
}
}
}
}
template <typename T>
__mlu_global__ void MLUUnion1KernelBBoxOverlaps(
const void *bbox1, const void *bbox2, void *ious, const int32_t num_bbox1,
const int32_t num_bbox2, const int32_t mode, const bool aligned,
const int32_t offset) {
/*
* NRAM partition
* |-------------------------------------------------------------|
* | vec_b1_x1 | vec_b1_y1 | vec_b1_x2 | vec_b1_y2 |
* |-------------------------------------------------------------|
* | vec_b2_x1 | vec_b2_y1 | vec_b2_x2 | vec_b2_y2 |
* |-------------------------------------------------------------|
* | vec_left | vec_right | vec_top | vec_bottom |
* |-------------------------------------------------------------|
*
*/
const int32_t align_bytes = PAD_DOWN(MAX_NRAM_SIZE, NFU_ALIGN_SIZE);
const int32_t split_nram_num = 12;
const int32_t nram_stride =
align_bytes / NFU_ALIGN_SIZE / split_nram_num * NFU_ALIGN_SIZE;
void *vec_b1_x1 = nmem_buf;
void *vec_b1_y1 = nmem_buf + nram_stride;
void *vec_b1_x2 = nmem_buf + 2 * nram_stride;
void *vec_b1_y2 = nmem_buf + 3 * nram_stride;
void *vec_b2_x1 = nmem_buf + 4 * nram_stride;
void *vec_b2_y1 = nmem_buf + 5 * nram_stride;
void *vec_b2_x2 = nmem_buf + 6 * nram_stride;
void *vec_b2_y2 = nmem_buf + 7 * nram_stride;
void *vec_left = nmem_buf + 8 * nram_stride;
void *vec_right = nmem_buf + 9 * nram_stride;
void *vec_top = nmem_buf + 10 * nram_stride;
void *vec_bottom = nmem_buf + 11 * nram_stride;
const int32_t vec_length = nram_stride / sizeof(T);
bboxOverlapsWorkflow((T *)vec_b1_x1, (T *)vec_b1_y1, (T *)vec_b1_x2,
(T *)vec_b1_y2, (T *)vec_b2_x1, (T *)vec_b2_y1,
(T *)vec_b2_x2, (T *)vec_b2_y2, (T *)vec_left,
(T *)vec_right, (T *)vec_top, (T *)vec_bottom,
(T *)bbox1, (T *)bbox2, (T *)ious, offset, mode,
vec_length, num_bbox1, num_bbox2, aligned);
}
void KernelBBoxOverlaps(cnrtDim3_t k_dim, cnrtFunctionType_t k_type,
cnrtQueue_t queue, const cnrtDataType_t d_type,
const void *bbox1, const void *bbox2, void *ious,
const int32_t num_bbox1, const int32_t num_bbox2,
const int32_t mode, const bool aligned,
const int32_t offset) {
if (d_type == CNRT_FLOAT16) {
MLUUnion1KernelBBoxOverlaps<half><<<k_dim, k_type, queue>>>(
bbox1, bbox2, ious, num_bbox1, num_bbox2, mode, aligned, offset);
} else {
MLUUnion1KernelBBoxOverlaps<float><<<k_dim, k_type, queue>>>(
bbox1, bbox2, ious, num_bbox1, num_bbox2, mode, aligned, offset);
}
}
mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
This diff is collapsed.
Click to expand it.
mmcv/ops/csrc/common/mlu/carafe_utils.hpp
deleted
100644 → 0
View file @
6f674c7e
/*************************************************************************
* Copyright (C) 2022 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef CARAFE_UTILS_HPP_
#define CARAFE_UTILS_HPP_
#define NRAM_ALIGN_SIZE 64
struct
CarafeForwardParam
{
int
N
;
// batch size
int
Hi
;
// input height
int
Wi
;
// input width
int
Ci
;
// input channels
int
Ho
;
// output height
int
Wo
;
// output width
int
Cg
;
// channels per group
int
kernel_size
;
// kernel_size
int
group_size
;
// group_size
int
scale_factor
;
// scale_factor
int
kernel_size_half
;
// kernel half size (K-1)/2
int
kernel_size_sq
;
// square of kernel size
int
dtype_size
;
// size of tensor data type
// Host arrays' geometry
int
input_stride_g
;
int
input_stride_w
;
int
input_stride_h
;
int
input_stride_n
;
int
input_size
;
int
mask_stride_kh
;
int
mask_stride_g
;
int
mask_stride_w
;
int
mask_stride_h
;
int
mask_stride_n
;
int
mask_size
;
int
output_stride_g
;
int
output_stride_w
;
int
output_stride_h
;
int
output_stride_n
;
int
output_size
;
// NRAM arrays' geometry
int
input_nram_stride_g
;
int
input_nram_stride_w
;
int
input_nram_stride_h
;
int
input_nram_size
;
int
mask_nram_stride_kh
;
int
mask_nram_stride_g
;
int
mask_nram_stride_w
;
int
mask_nram_stride_h
;
int
mask_nram_size
;
int
output_nram_stride_g
;
int
output_nram_stride_w
;
int
output_nram_stride_h
;
int
output_nram_size
;
// for address/compute alignment
int
align_size_NRAM
;
// for addressing on NRAM
int
align_size_NFU
;
// for NFU operation length
int
block_Cg_NFU
;
// for bang_mul_const
int
job_num
;
// total job number
};
struct
CarafeForwardBlockDim
{
int
Ho
;
// block size of output height
int
Wo
;
// block size of output width
int
Kh
;
// block size of kernel height
int
Kw
;
// block size of kernel width
int
G
;
// block size of groups
int
Cg
;
// block size of channels within a group
int
Hi
;
// block size of input height
int
Wi
;
// block size of input width
};
struct
CarafeForwardGridDim
{
int
Ho
;
// number of blocks of output height
int
Wo
;
int
Kh
;
int
Kw
;
int
G
;
int
Cg
;
};
#endif // CARAFE_UTILS_HPP_
mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
View file @
91da9643
...
...
@@ -45,148 +45,6 @@ __mlu_func__ inline scalar_t max(scalar_t a, scalar_t b) {
return
a
>
b
?
a
:
b
;
}
/*!
* @brief loads data from global DRAM to NRAM with 2D pattern.
*
* @param[out] dst
* Pointer to NRAM that stores dst data.
* @param[in] src
* Pointer to global DRAM that stores src data.
* @param[in] size
* The byte size of segment in the lower dimension.
* @param[in] dst_str
* The data stride in bytes between segments in the lower dimension of dst.
* @param[in] src_str
* The data stride in bytes between segments in the lower dimension of src.
* @param[in] seg_num
* The total count of data segments in the lower dimension.
*/
template
<
typename
T
>
__mlu_func__
void
loadStr2D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
dst_str
,
const
int
src_str
,
const
int
seg_num
)
{
if
(
dst_str
==
src_str
&&
size
==
src_str
)
{
__memcpy
(
dst
,
src
,
src_str
*
seg_num
*
sizeof
(
T
),
GDRAM2NRAM
);
}
else
if
((
size
==
src_str
||
src_str
<=
dst_str
)
&&
src_str
*
sizeof
(
T
)
<=
512
)
{
// gather data less than 512Bytes to improve IO efficiency
T
*
tmp
=
(
T
*
)
dst
+
(
dst_str
-
src_str
)
*
seg_num
;
__memcpy
(
tmp
,
src
,
(
src_str
*
(
seg_num
-
1
)
+
size
)
*
sizeof
(
T
),
GDRAM2NRAM
);
if
(
dst_str
!=
src_str
)
{
__memcpy
(
dst
,
tmp
,
size
*
sizeof
(
T
),
NRAM2NRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
}
else
{
__memcpy
(
dst
,
src
,
size
*
sizeof
(
T
),
GDRAM2NRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
}
/*!
* @brief loads data from global DRAM to NRAM with 3D pattern.
*
* @param[out] dst
* Pointer to NRAM that stores dst data.
* @param[in] src
* Pointer to global DRAM that stores src data.
* @param[in] size
* The byte size of segment in the lowest dimension.
* @param[in] seg_num_in
* The total count of data segments in the lowest dimension.
* @param[in] seg_num_out
* The total count of data segments in the middle dimension.
* @param[in] dst_str_in
* The data stride in bytes between segments in the lowest dimension of dst.
* @param[in] dst_str_out
* The data stride in bytes between segments in the middle dimension of dst.
* @param[in] src_str_in
* The data stride in bytes between segments in the lowest dimension of src.
* @param[in] src_str_out
* The data stride in bytes between segments in the middle dimension of src.
*/
template
<
typename
T
>
__mlu_func__
void
loadStr3D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
seg_num_in
,
const
int
seg_num_out
,
const
int
dst_str_in
,
const
int
dst_str_out
,
const
int
src_str_in
,
const
int
src_str_out
)
{
T
*
tmp_dst
=
dst
;
T
*
tmp_src
=
src
;
for
(
int
i
=
0
;
i
<
seg_num_out
;
++
i
)
{
loadStr2D
(
tmp_dst
,
tmp_src
,
size
,
dst_str_in
,
src_str_in
,
seg_num_in
);
tmp_src
+=
src_str_out
;
tmp_dst
+=
dst_str_out
;
}
}
/*!
* @brief stores data from NRAM to global DRAM with 2D pattern.
*
* @param[out] dst
* Pointer to global DRAM that stores dst data.
* @param[in] src
* Pointer to NRAM that stores src data.
* @param[in] size
* The byte size of segment in the lower dimension.
* @param[in] dst_str
* The data stride in bytes between segments in the lower dimension of dst.
* @param[in] src_str
* The data stride in bytes between segments in the lower dimension of src.
* @param[in] seg_num
* The total count of data segments in the lower dimension.
*/
template
<
typename
T
>
__mlu_func__
void
storeStr2D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
seg_num
,
const
int
dst_str
,
const
int
src_str
)
{
if
((
size
==
dst_str
&&
dst_str
<=
src_str
)
&&
dst_str
*
sizeof
(
T
)
<=
512
)
{
// gather data less than 512Bytes to improve IO efficiency
if
(
dst_str
!=
src_str
)
{
__memcpy
(
src
,
src
,
size
*
sizeof
(
T
),
NRAM2NRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
__memcpy
(
dst
,
src
,
size
*
seg_num
*
sizeof
(
T
),
NRAM2GDRAM
);
}
else
{
__memcpy
(
dst
,
src
,
size
*
sizeof
(
T
),
NRAM2GDRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
}
/*!
* @brief stores data from NRAM to global DRAM with 3D pattern.
*
* @param[out] dst
* Pointer to global DRAM that stores dst data.
* @param[in] src
* Pointer to NRAM that stores src data.
* @param[in] size
* The byte size of segment in the lowest dimension.
* @param[in] seg_num_in
* The total count of data segments in the lowest dimension.
* @param[in] seg_num_out
* The total count of data segments in the middle dimension.
* @param[in] dst_str_in
* The data stride in bytes between segments in the lowest dimension of dst.
* @param[in] dst_str_out
* The data stride in bytes between segments in the middle dimension of dst.
* @param[in] src_str_in
* The data stride in bytes between segments in the lowest dimension of src.
* @param[in] src_str_out
* The data stride in bytes between segments in the middle dimension of src.
*/
template
<
typename
T
>
__mlu_func__
void
storeStr3D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
seg_num_in
,
const
int
seg_num_out
,
const
int
dst_str_in
,
const
int
dst_str_out
,
const
int
src_str_in
,
const
int
src_str_out
)
{
T
*
tmp_dst
=
dst
;
T
*
tmp_src
=
src
;
for
(
int
i
=
0
;
i
<
seg_num_out
;
++
i
)
{
storeStr2D
(
tmp_dst
,
tmp_src
,
size
,
seg_num_in
,
dst_str_in
,
src_str_in
);
tmp_src
+=
src_str_out
;
tmp_dst
+=
dst_str_out
;
}
}
/*!
* @brief Converts int32 to float32 data type.
*
...
...
mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
This diff is collapsed.
Click to expand it.
mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment