Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
91da9643
Commit
91da9643
authored
Aug 13, 2024
by
limm
Browse files
support v2.1.0
parent
6f674c7e
Changes
139
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
229 additions
and
2784 deletions
+229
-2784
mmcv/cnn/bricks/conv_module.py
mmcv/cnn/bricks/conv_module.py
+129
-3
mmcv/cnn/bricks/generalized_attention.py
mmcv/cnn/bricks/generalized_attention.py
+1
-1
mmcv/cnn/bricks/norm.py
mmcv/cnn/bricks/norm.py
+12
-9
mmcv/cnn/bricks/padding.py
mmcv/cnn/bricks/padding.py
+3
-1
mmcv/cnn/bricks/plugin.py
mmcv/cnn/bricks/plugin.py
+12
-9
mmcv/cnn/bricks/upsample.py
mmcv/cnn/bricks/upsample.py
+11
-7
mmcv/cnn/bricks/wrappers.py
mmcv/cnn/bricks/wrappers.py
+7
-7
mmcv/image/geometric.py
mmcv/image/geometric.py
+18
-16
mmcv/ops/__init__.py
mmcv/ops/__init__.py
+7
-0
mmcv/ops/bbox.py
mmcv/ops/bbox.py
+4
-0
mmcv/ops/box_iou_rotated.py
mmcv/ops/box_iou_rotated.py
+9
-1
mmcv/ops/conv2d_gradfix.py
mmcv/ops/conv2d_gradfix.py
+8
-16
mmcv/ops/corner_pool.py
mmcv/ops/corner_pool.py
+4
-2
mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
+4
-1
mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu
+0
-322
mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu
+0
-552
mmcv/ops/csrc/common/mlu/carafe_utils.hpp
mmcv/ops/csrc/common/mlu/carafe_utils.hpp
+0
-95
mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
+0
-142
mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu
+0
-712
mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu
mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu
+0
-888
No files found.
mmcv/cnn/bricks/conv_module.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
warnings
from
functools
import
partial
from
typing
import
Dict
,
Optional
,
Tuple
,
Union
from
typing
import
Dict
,
Optional
,
Tuple
,
Union
import
torch
import
torch
...
@@ -14,6 +15,56 @@ from .norm import build_norm_layer
...
@@ -14,6 +15,56 @@ from .norm import build_norm_layer
from
.padding
import
build_padding_layer
from
.padding
import
build_padding_layer
def
efficient_conv_bn_eval_forward
(
bn
:
_BatchNorm
,
conv
:
nn
.
modules
.
conv
.
_ConvNd
,
x
:
torch
.
Tensor
):
"""
Implementation based on https://arxiv.org/abs/2305.11624
"Tune-Mode ConvBN Blocks For Efficient Transfer Learning"
It leverages the associative law between convolution and affine transform,
i.e., normalize (weight conv feature) = (normalize weight) conv feature.
It works for Eval mode of ConvBN blocks during validation, and can be used
for training as well. It reduces memory and computation cost.
Args:
bn (_BatchNorm): a BatchNorm module.
conv (nn._ConvNd): a conv module
x (torch.Tensor): Input feature map.
"""
# These lines of code are designed to deal with various cases
# like bn without affine transform, and conv without bias
weight_on_the_fly
=
conv
.
weight
if
conv
.
bias
is
not
None
:
bias_on_the_fly
=
conv
.
bias
else
:
bias_on_the_fly
=
torch
.
zeros_like
(
bn
.
running_var
)
if
bn
.
weight
is
not
None
:
bn_weight
=
bn
.
weight
else
:
bn_weight
=
torch
.
ones_like
(
bn
.
running_var
)
if
bn
.
bias
is
not
None
:
bn_bias
=
bn
.
bias
else
:
bn_bias
=
torch
.
zeros_like
(
bn
.
running_var
)
# shape of [C_out, 1, 1, 1] in Conv2d
weight_coeff
=
torch
.
rsqrt
(
bn
.
running_var
+
bn
.
eps
).
reshape
([
-
1
]
+
[
1
]
*
(
len
(
conv
.
weight
.
shape
)
-
1
))
# shape of [C_out, 1, 1, 1] in Conv2d
coefff_on_the_fly
=
bn_weight
.
view_as
(
weight_coeff
)
*
weight_coeff
# shape of [C_out, C_in, k, k] in Conv2d
weight_on_the_fly
=
weight_on_the_fly
*
coefff_on_the_fly
# shape of [C_out] in Conv2d
bias_on_the_fly
=
bn_bias
+
coefff_on_the_fly
.
flatten
()
*
\
(
bias_on_the_fly
-
bn
.
running_mean
)
return
conv
.
_conv_forward
(
x
,
weight_on_the_fly
,
bias_on_the_fly
)
@
MODELS
.
register_module
()
@
MODELS
.
register_module
()
class
ConvModule
(
nn
.
Module
):
class
ConvModule
(
nn
.
Module
):
"""A conv block that bundles conv/norm/activation layers.
"""A conv block that bundles conv/norm/activation layers.
...
@@ -65,6 +116,9 @@ class ConvModule(nn.Module):
...
@@ -65,6 +116,9 @@ class ConvModule(nn.Module):
sequence of "conv", "norm" and "act". Common examples are
sequence of "conv", "norm" and "act". Common examples are
("conv", "norm", "act") and ("act", "conv", "norm").
("conv", "norm", "act") and ("act", "conv", "norm").
Default: ('conv', 'norm', 'act').
Default: ('conv', 'norm', 'act').
efficient_conv_bn_eval (bool): Whether use efficient conv when the
consecutive bn is in eval mode (either training or testing), as
proposed in https://arxiv.org/abs/2305.11624 . Default: `False`.
"""
"""
_abbr_
=
'conv_block'
_abbr_
=
'conv_block'
...
@@ -84,7 +138,8 @@ class ConvModule(nn.Module):
...
@@ -84,7 +138,8 @@ class ConvModule(nn.Module):
inplace
:
bool
=
True
,
inplace
:
bool
=
True
,
with_spectral_norm
:
bool
=
False
,
with_spectral_norm
:
bool
=
False
,
padding_mode
:
str
=
'zeros'
,
padding_mode
:
str
=
'zeros'
,
order
:
tuple
=
(
'conv'
,
'norm'
,
'act'
)):
order
:
tuple
=
(
'conv'
,
'norm'
,
'act'
),
efficient_conv_bn_eval
:
bool
=
False
):
super
().
__init__
()
super
().
__init__
()
assert
conv_cfg
is
None
or
isinstance
(
conv_cfg
,
dict
)
assert
conv_cfg
is
None
or
isinstance
(
conv_cfg
,
dict
)
assert
norm_cfg
is
None
or
isinstance
(
norm_cfg
,
dict
)
assert
norm_cfg
is
None
or
isinstance
(
norm_cfg
,
dict
)
...
@@ -155,6 +210,8 @@ class ConvModule(nn.Module):
...
@@ -155,6 +210,8 @@ class ConvModule(nn.Module):
else
:
else
:
self
.
norm_name
=
None
# type: ignore
self
.
norm_name
=
None
# type: ignore
self
.
turn_on_efficient_conv_bn_eval
(
efficient_conv_bn_eval
)
# build activation layer
# build activation layer
if
self
.
with_activation
:
if
self
.
with_activation
:
act_cfg_
=
act_cfg
.
copy
()
# type: ignore
act_cfg_
=
act_cfg
.
copy
()
# type: ignore
...
@@ -200,13 +257,82 @@ class ConvModule(nn.Module):
...
@@ -200,13 +257,82 @@ class ConvModule(nn.Module):
x
:
torch
.
Tensor
,
x
:
torch
.
Tensor
,
activate
:
bool
=
True
,
activate
:
bool
=
True
,
norm
:
bool
=
True
)
->
torch
.
Tensor
:
norm
:
bool
=
True
)
->
torch
.
Tensor
:
for
layer
in
self
.
order
:
layer_index
=
0
while
layer_index
<
len
(
self
.
order
):
layer
=
self
.
order
[
layer_index
]
if
layer
==
'conv'
:
if
layer
==
'conv'
:
if
self
.
with_explicit_padding
:
if
self
.
with_explicit_padding
:
x
=
self
.
padding_layer
(
x
)
x
=
self
.
padding_layer
(
x
)
x
=
self
.
conv
(
x
)
# if the next operation is norm and we have a norm layer in
# eval mode and we have enabled `efficient_conv_bn_eval` for
# the conv operator, then activate the optimized forward and
# skip the next norm operator since it has been fused
if
layer_index
+
1
<
len
(
self
.
order
)
and
\
self
.
order
[
layer_index
+
1
]
==
'norm'
and
norm
and
\
self
.
with_norm
and
not
self
.
norm
.
training
and
\
self
.
efficient_conv_bn_eval_forward
is
not
None
:
self
.
conv
.
forward
=
partial
(
self
.
efficient_conv_bn_eval_forward
,
self
.
norm
,
self
.
conv
)
layer_index
+=
1
x
=
self
.
conv
(
x
)
del
self
.
conv
.
forward
else
:
x
=
self
.
conv
(
x
)
elif
layer
==
'norm'
and
norm
and
self
.
with_norm
:
elif
layer
==
'norm'
and
norm
and
self
.
with_norm
:
x
=
self
.
norm
(
x
)
x
=
self
.
norm
(
x
)
elif
layer
==
'act'
and
activate
and
self
.
with_activation
:
elif
layer
==
'act'
and
activate
and
self
.
with_activation
:
x
=
self
.
activate
(
x
)
x
=
self
.
activate
(
x
)
layer_index
+=
1
return
x
return
x
def
turn_on_efficient_conv_bn_eval
(
self
,
efficient_conv_bn_eval
=
True
):
# efficient_conv_bn_eval works for conv + bn
# with `track_running_stats` option
if
efficient_conv_bn_eval
and
self
.
norm
\
and
isinstance
(
self
.
norm
,
_BatchNorm
)
\
and
self
.
norm
.
track_running_stats
:
self
.
efficient_conv_bn_eval_forward
=
efficient_conv_bn_eval_forward
# noqa: E501
else
:
self
.
efficient_conv_bn_eval_forward
=
None
# type: ignore
@
staticmethod
def
create_from_conv_bn
(
conv
:
torch
.
nn
.
modules
.
conv
.
_ConvNd
,
bn
:
torch
.
nn
.
modules
.
batchnorm
.
_BatchNorm
,
efficient_conv_bn_eval
=
True
)
->
'ConvModule'
:
"""Create a ConvModule from a conv and a bn module."""
self
=
ConvModule
.
__new__
(
ConvModule
)
super
(
ConvModule
,
self
).
__init__
()
self
.
conv_cfg
=
None
self
.
norm_cfg
=
None
self
.
act_cfg
=
None
self
.
inplace
=
False
self
.
with_spectral_norm
=
False
self
.
with_explicit_padding
=
False
self
.
order
=
(
'conv'
,
'norm'
,
'act'
)
self
.
with_norm
=
True
self
.
with_activation
=
False
self
.
with_bias
=
conv
.
bias
is
not
None
# build convolution layer
self
.
conv
=
conv
# export the attributes of self.conv to a higher level for convenience
self
.
in_channels
=
self
.
conv
.
in_channels
self
.
out_channels
=
self
.
conv
.
out_channels
self
.
kernel_size
=
self
.
conv
.
kernel_size
self
.
stride
=
self
.
conv
.
stride
self
.
padding
=
self
.
conv
.
padding
self
.
dilation
=
self
.
conv
.
dilation
self
.
transposed
=
self
.
conv
.
transposed
self
.
output_padding
=
self
.
conv
.
output_padding
self
.
groups
=
self
.
conv
.
groups
# build normalization layers
self
.
norm_name
,
norm
=
'bn'
,
bn
self
.
add_module
(
self
.
norm_name
,
norm
)
self
.
turn_on_efficient_conv_bn_eval
(
efficient_conv_bn_eval
)
return
self
mmcv/cnn/bricks/generalized_attention.py
View file @
91da9643
...
@@ -371,7 +371,7 @@ class GeneralizedAttention(nn.Module):
...
@@ -371,7 +371,7 @@ class GeneralizedAttention(nn.Module):
contiguous
().
\
contiguous
().
\
view
(
1
,
1
,
h
*
w
,
h_kv
*
w_kv
)
view
(
1
,
1
,
h
*
w
,
h_kv
*
w_kv
)
energy
=
energy
.
masked_fill_
(
cur_local_constraint_map
,
energy
=
energy
.
masked_fill_
(
cur_local_constraint_map
.
bool
()
,
float
(
'-inf'
))
float
(
'-inf'
))
attention
=
F
.
softmax
(
energy
,
3
)
attention
=
F
.
softmax
(
energy
,
3
)
...
...
mmcv/cnn/bricks/norm.py
View file @
91da9643
...
@@ -98,14 +98,17 @@ def build_norm_layer(cfg: Dict,
...
@@ -98,14 +98,17 @@ def build_norm_layer(cfg: Dict,
layer_type
=
cfg_
.
pop
(
'type'
)
layer_type
=
cfg_
.
pop
(
'type'
)
# Switch registry to the target scope. If `norm_layer` cannot be found
if
inspect
.
isclass
(
layer_type
):
# in the registry, fallback to search `norm_layer` in the
norm_layer
=
layer_type
# mmengine.MODELS.
else
:
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
# Switch registry to the target scope. If `norm_layer` cannot be found
norm_layer
=
registry
.
get
(
layer_type
)
# in the registry, fallback to search `norm_layer` in the
if
norm_layer
is
None
:
# mmengine.MODELS.
raise
KeyError
(
f
'Cannot find
{
norm_layer
}
in registry under scope '
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
f
'name
{
registry
.
scope
}
'
)
norm_layer
=
registry
.
get
(
layer_type
)
if
norm_layer
is
None
:
raise
KeyError
(
f
'Cannot find
{
norm_layer
}
in registry under '
f
'scope name
{
registry
.
scope
}
'
)
abbr
=
infer_abbr
(
norm_layer
)
abbr
=
infer_abbr
(
norm_layer
)
assert
isinstance
(
postfix
,
(
int
,
str
))
assert
isinstance
(
postfix
,
(
int
,
str
))
...
@@ -113,7 +116,7 @@ def build_norm_layer(cfg: Dict,
...
@@ -113,7 +116,7 @@ def build_norm_layer(cfg: Dict,
requires_grad
=
cfg_
.
pop
(
'requires_grad'
,
True
)
requires_grad
=
cfg_
.
pop
(
'requires_grad'
,
True
)
cfg_
.
setdefault
(
'eps'
,
1e-5
)
cfg_
.
setdefault
(
'eps'
,
1e-5
)
if
layer_type
!=
'GN'
:
if
norm_layer
is
not
nn
.
GroupNorm
:
layer
=
norm_layer
(
num_features
,
**
cfg_
)
layer
=
norm_layer
(
num_features
,
**
cfg_
)
if
layer_type
==
'SyncBN'
and
hasattr
(
layer
,
'_specify_ddp_gpu_num'
):
if
layer_type
==
'SyncBN'
and
hasattr
(
layer
,
'_specify_ddp_gpu_num'
):
layer
.
_specify_ddp_gpu_num
(
1
)
layer
.
_specify_ddp_gpu_num
(
1
)
...
...
mmcv/cnn/bricks/padding.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
inspect
from
typing
import
Dict
from
typing
import
Dict
import
torch.nn
as
nn
import
torch.nn
as
nn
...
@@ -27,7 +28,8 @@ def build_padding_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
...
@@ -27,7 +28,8 @@ def build_padding_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
cfg_
=
cfg
.
copy
()
cfg_
=
cfg
.
copy
()
padding_type
=
cfg_
.
pop
(
'type'
)
padding_type
=
cfg_
.
pop
(
'type'
)
if
inspect
.
isclass
(
padding_type
):
return
padding_type
(
*
args
,
**
kwargs
,
**
cfg_
)
# Switch registry to the target scope. If `padding_layer` cannot be found
# Switch registry to the target scope. If `padding_layer` cannot be found
# in the registry, fallback to search `padding_layer` in the
# in the registry, fallback to search `padding_layer` in the
# mmengine.MODELS.
# mmengine.MODELS.
...
...
mmcv/cnn/bricks/plugin.py
View file @
91da9643
...
@@ -79,15 +79,18 @@ def build_plugin_layer(cfg: Dict,
...
@@ -79,15 +79,18 @@ def build_plugin_layer(cfg: Dict,
cfg_
=
cfg
.
copy
()
cfg_
=
cfg
.
copy
()
layer_type
=
cfg_
.
pop
(
'type'
)
layer_type
=
cfg_
.
pop
(
'type'
)
if
inspect
.
isclass
(
layer_type
):
# Switch registry to the target scope. If `plugin_layer` cannot be found
plugin_layer
=
layer_type
# in the registry, fallback to search `plugin_layer` in the
else
:
# mmengine.MODELS.
# Switch registry to the target scope. If `plugin_layer` cannot be
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
# found in the registry, fallback to search `plugin_layer` in the
plugin_layer
=
registry
.
get
(
layer_type
)
# mmengine.MODELS.
if
plugin_layer
is
None
:
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
raise
KeyError
(
f
'Cannot find
{
plugin_layer
}
in registry under scope '
plugin_layer
=
registry
.
get
(
layer_type
)
f
'name
{
registry
.
scope
}
'
)
if
plugin_layer
is
None
:
raise
KeyError
(
f
'Cannot find
{
plugin_layer
}
in registry under scope '
f
'name
{
registry
.
scope
}
'
)
abbr
=
infer_abbr
(
plugin_layer
)
abbr
=
infer_abbr
(
plugin_layer
)
assert
isinstance
(
postfix
,
(
int
,
str
))
assert
isinstance
(
postfix
,
(
int
,
str
))
...
...
mmcv/cnn/bricks/upsample.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
inspect
from
typing
import
Dict
from
typing
import
Dict
import
torch
import
torch
...
@@ -76,15 +77,18 @@ def build_upsample_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
...
@@ -76,15 +77,18 @@ def build_upsample_layer(cfg: Dict, *args, **kwargs) -> nn.Module:
layer_type
=
cfg_
.
pop
(
'type'
)
layer_type
=
cfg_
.
pop
(
'type'
)
if
inspect
.
isclass
(
layer_type
):
upsample
=
layer_type
# Switch registry to the target scope. If `upsample` cannot be found
# Switch registry to the target scope. If `upsample` cannot be found
# in the registry, fallback to search `upsample` in the
# in the registry, fallback to search `upsample` in the
# mmengine.MODELS.
# mmengine.MODELS.
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
else
:
upsample
=
registry
.
get
(
layer_type
)
with
MODELS
.
switch_scope_and_registry
(
None
)
as
registry
:
if
upsample
is
None
:
upsample
=
registry
.
get
(
layer_type
)
raise
KeyError
(
f
'Cannot find
{
upsample
}
in registry under scope '
if
upsample
is
None
:
f
'name
{
registry
.
scope
}
'
)
raise
KeyError
(
f
'Cannot find
{
upsample
}
in registry under scope '
if
upsample
is
nn
.
Upsample
:
f
'name
{
registry
.
scope
}
'
)
cfg_
[
'mode'
]
=
layer_type
if
upsample
is
nn
.
Upsample
:
cfg_
[
'mode'
]
=
layer_type
layer
=
upsample
(
*
args
,
**
kwargs
,
**
cfg_
)
layer
=
upsample
(
*
args
,
**
kwargs
,
**
cfg_
)
return
layer
return
layer
mmcv/cnn/bricks/wrappers.py
View file @
91da9643
...
@@ -41,7 +41,7 @@ class NewEmptyTensorOp(torch.autograd.Function):
...
@@ -41,7 +41,7 @@ class NewEmptyTensorOp(torch.autograd.Function):
class
Conv2d
(
nn
.
Conv2d
):
class
Conv2d
(
nn
.
Conv2d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
2
:],
self
.
kernel_size
,
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
2
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
self
.
dilation
):
self
.
padding
,
self
.
stride
,
self
.
dilation
):
...
@@ -62,7 +62,7 @@ class Conv2d(nn.Conv2d):
...
@@ -62,7 +62,7 @@ class Conv2d(nn.Conv2d):
class
Conv3d
(
nn
.
Conv3d
):
class
Conv3d
(
nn
.
Conv3d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
3
:],
self
.
kernel_size
,
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
3
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
self
.
dilation
):
self
.
padding
,
self
.
stride
,
self
.
dilation
):
...
@@ -84,7 +84,7 @@ class Conv3d(nn.Conv3d):
...
@@ -84,7 +84,7 @@ class Conv3d(nn.Conv3d):
class
ConvTranspose2d
(
nn
.
ConvTranspose2d
):
class
ConvTranspose2d
(
nn
.
ConvTranspose2d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
,
op
in
zip
(
x
.
shape
[
-
2
:],
self
.
kernel_size
,
for
i
,
k
,
p
,
s
,
d
,
op
in
zip
(
x
.
shape
[
-
2
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
self
.
padding
,
self
.
stride
,
...
@@ -106,7 +106,7 @@ class ConvTranspose2d(nn.ConvTranspose2d):
...
@@ -106,7 +106,7 @@ class ConvTranspose2d(nn.ConvTranspose2d):
class
ConvTranspose3d
(
nn
.
ConvTranspose3d
):
class
ConvTranspose3d
(
nn
.
ConvTranspose3d
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
4
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
out_shape
=
[
x
.
shape
[
0
],
self
.
out_channels
]
for
i
,
k
,
p
,
s
,
d
,
op
in
zip
(
x
.
shape
[
-
3
:],
self
.
kernel_size
,
for
i
,
k
,
p
,
s
,
d
,
op
in
zip
(
x
.
shape
[
-
3
:],
self
.
kernel_size
,
self
.
padding
,
self
.
stride
,
self
.
padding
,
self
.
stride
,
...
@@ -127,7 +127,7 @@ class MaxPool2d(nn.MaxPool2d):
...
@@ -127,7 +127,7 @@ class MaxPool2d(nn.MaxPool2d):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# PyTorch 1.9 does not support empty tensor inference yet
# PyTorch 1.9 does not support empty tensor inference yet
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
x
.
numel
()
==
0
:
out_shape
=
list
(
x
.
shape
[:
2
])
out_shape
=
list
(
x
.
shape
[:
2
])
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
2
:],
_pair
(
self
.
kernel_size
),
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
2
:],
_pair
(
self
.
kernel_size
),
_pair
(
self
.
padding
),
_pair
(
self
.
stride
),
_pair
(
self
.
padding
),
_pair
(
self
.
stride
),
...
@@ -145,7 +145,7 @@ class MaxPool3d(nn.MaxPool3d):
...
@@ -145,7 +145,7 @@ class MaxPool3d(nn.MaxPool3d):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# PyTorch 1.9 does not support empty tensor inference yet
# PyTorch 1.9 does not support empty tensor inference yet
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
x
.
numel
()
==
0
:
out_shape
=
list
(
x
.
shape
[:
2
])
out_shape
=
list
(
x
.
shape
[:
2
])
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
3
:],
_triple
(
self
.
kernel_size
),
for
i
,
k
,
p
,
s
,
d
in
zip
(
x
.
shape
[
-
3
:],
_triple
(
self
.
kernel_size
),
_triple
(
self
.
padding
),
_triple
(
self
.
padding
),
...
@@ -164,7 +164,7 @@ class Linear(torch.nn.Linear):
...
@@ -164,7 +164,7 @@ class Linear(torch.nn.Linear):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# empty tensor forward of Linear layer is supported in Pytorch 1.6
# empty tensor forward of Linear layer is supported in Pytorch 1.6
if
x
.
numel
()
==
0
and
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
5
)):
if
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
5
))
and
x
.
numel
()
==
0
:
out_shape
=
[
x
.
shape
[
0
],
self
.
out_features
]
out_shape
=
[
x
.
shape
[
0
],
self
.
out_features
]
empty
=
NewEmptyTensorOp
.
apply
(
x
,
out_shape
)
empty
=
NewEmptyTensorOp
.
apply
(
x
,
out_shape
)
if
self
.
training
:
if
self
.
training
:
...
...
mmcv/image/geometric.py
View file @
91da9643
...
@@ -16,13 +16,13 @@ except ImportError:
...
@@ -16,13 +16,13 @@ except ImportError:
def
_scale_size
(
def
_scale_size
(
size
:
Tuple
[
int
,
int
],
size
:
Tuple
[
int
,
int
],
scale
:
Union
[
float
,
int
,
tuple
],
scale
:
Union
[
float
,
int
,
Tuple
[
float
,
float
],
Tuple
[
int
,
int
]
],
)
->
Tuple
[
int
,
int
]:
)
->
Tuple
[
int
,
int
]:
"""Rescale a size by a ratio.
"""Rescale a size by a ratio.
Args:
Args:
size (tuple[int]): (w, h).
size (tuple[int]): (w, h).
scale (float | tuple(float)): Scaling factor.
scale (float |
int |
tuple(float)
| tuple(int)
): Scaling factor.
Returns:
Returns:
tuple[int]: scaled size.
tuple[int]: scaled size.
...
@@ -128,7 +128,8 @@ def imresize_to_multiple(
...
@@ -128,7 +128,8 @@ def imresize_to_multiple(
img
:
np
.
ndarray
,
img
:
np
.
ndarray
,
divisor
:
Union
[
int
,
Tuple
[
int
,
int
]],
divisor
:
Union
[
int
,
Tuple
[
int
,
int
]],
size
:
Union
[
int
,
Tuple
[
int
,
int
],
None
]
=
None
,
size
:
Union
[
int
,
Tuple
[
int
,
int
],
None
]
=
None
,
scale_factor
:
Union
[
float
,
Tuple
[
float
,
float
],
None
]
=
None
,
scale_factor
:
Union
[
float
,
int
,
Tuple
[
float
,
float
],
Tuple
[
int
,
int
],
None
]
=
None
,
keep_ratio
:
bool
=
False
,
keep_ratio
:
bool
=
False
,
return_scale
:
bool
=
False
,
return_scale
:
bool
=
False
,
interpolation
:
str
=
'bilinear'
,
interpolation
:
str
=
'bilinear'
,
...
@@ -145,9 +146,10 @@ def imresize_to_multiple(
...
@@ -145,9 +146,10 @@ def imresize_to_multiple(
divisor. If divisor is a tuple, divisor should be
divisor. If divisor is a tuple, divisor should be
(w_divisor, h_divisor).
(w_divisor, h_divisor).
size (None | int | tuple[int]): Target size (w, h). Default: None.
size (None | int | tuple[int]): Target size (w, h). Default: None.
scale_factor (None | float | tuple[float]): Multiplier for spatial
scale_factor (None | float | int | tuple[float] | tuple[int]):
size. Should match input size if it is a tuple and the 2D style is
Multiplier for spatial size. Should match input size if it is a
(w_scale_factor, h_scale_factor). Default: None.
tuple and the 2D style is (w_scale_factor, h_scale_factor).
Default: None.
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
keep_ratio (bool): Whether to keep the aspect ratio when resizing the
image. Default: False.
image. Default: False.
return_scale (bool): Whether to return `w_scale` and `h_scale`.
return_scale (bool): Whether to return `w_scale` and `h_scale`.
...
@@ -215,16 +217,16 @@ def imresize_like(
...
@@ -215,16 +217,16 @@ def imresize_like(
def
rescale_size
(
old_size
:
tuple
,
def
rescale_size
(
old_size
:
tuple
,
scale
:
Union
[
float
,
int
,
t
uple
],
scale
:
Union
[
float
,
int
,
T
uple
[
int
,
int
]
],
return_scale
:
bool
=
False
)
->
tuple
:
return_scale
:
bool
=
False
)
->
tuple
:
"""Calculate the new size to be rescaled to.
"""Calculate the new size to be rescaled to.
Args:
Args:
old_size (tuple[int]): The old size (w, h) of image.
old_size (tuple[int]): The old size (w, h) of image.
scale (float | tuple[int]): The scaling factor or maximum size.
scale (float |
int |
tuple[int]): The scaling factor or maximum size.
If it is a float number, then the image will be
rescaled by this
If it is a float number
or an integer
, then the image will be
factor, else if it is a tuple of 2 integers, then
the image will
rescaled by this
factor, else if it is a tuple of 2 integers, then
be rescaled as large as possible within the scale.
the image will
be rescaled as large as possible within the scale.
return_scale (bool): Whether to return the scaling factor besides the
return_scale (bool): Whether to return the scaling factor besides the
rescaled image size.
rescaled image size.
...
@@ -255,7 +257,7 @@ def rescale_size(old_size: tuple,
...
@@ -255,7 +257,7 @@ def rescale_size(old_size: tuple,
def
imrescale
(
def
imrescale
(
img
:
np
.
ndarray
,
img
:
np
.
ndarray
,
scale
:
Union
[
float
,
Tuple
[
int
,
int
]],
scale
:
Union
[
float
,
int
,
Tuple
[
int
,
int
]],
return_scale
:
bool
=
False
,
return_scale
:
bool
=
False
,
interpolation
:
str
=
'bilinear'
,
interpolation
:
str
=
'bilinear'
,
backend
:
Optional
[
str
]
=
None
backend
:
Optional
[
str
]
=
None
...
@@ -264,10 +266,10 @@ def imrescale(
...
@@ -264,10 +266,10 @@ def imrescale(
Args:
Args:
img (ndarray): The input image.
img (ndarray): The input image.
scale (float | tuple[int]): The scaling factor or maximum size.
scale (float |
int |
tuple[int]): The scaling factor or maximum size.
If it is a float number, then the image will be
rescaled by this
If it is a float number
or an integer
, then the image will be
factor, else if it is a tuple of 2 integers, then
the image will
rescaled by this
factor, else if it is a tuple of 2 integers, then
be rescaled as large as possible within the scale.
the image will
be rescaled as large as possible within the scale.
return_scale (bool): Whether to return the scaling factor besides the
return_scale (bool): Whether to return the scaling factor besides the
rescaled image.
rescaled image.
interpolation (str): Same as :func:`resize`.
interpolation (str): Same as :func:`resize`.
...
...
mmcv/ops/__init__.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
mmcv.utils
import
IS_MLU_AVAILABLE
from
.active_rotated_filter
import
active_rotated_filter
from
.active_rotated_filter
import
active_rotated_filter
from
.assign_score_withk
import
assign_score_withk
from
.assign_score_withk
import
assign_score_withk
from
.ball_query
import
ball_query
from
.ball_query
import
ball_query
...
@@ -109,3 +110,9 @@ __all__ = [
...
@@ -109,3 +110,9 @@ __all__ = [
'PrRoIPool'
,
'prroi_pool'
,
'bias_act'
,
'filtered_lrelu'
,
'conv2d'
,
'PrRoIPool'
,
'prroi_pool'
,
'bias_act'
,
'filtered_lrelu'
,
'conv2d'
,
'conv_transpose2d'
,
'filter2d'
,
'upsample2d'
,
'BezierAlign'
,
'bezier_align'
'conv_transpose2d'
,
'filter2d'
,
'upsample2d'
,
'BezierAlign'
,
'bezier_align'
]
]
if
IS_MLU_AVAILABLE
:
from
.deform_conv
import
DeformConv2dPack_MLU
# noqa:F401
from
.modulated_deform_conv
import
\
ModulatedDeformConv2dPack_MLU
# noqa:F401
__all__
.
extend
([
'ModulatedDeformConv2dPack_MLU'
,
'DeformConv2dPack_MLU'
])
mmcv/ops/bbox.py
View file @
91da9643
...
@@ -116,6 +116,10 @@ def bbox_overlaps(bboxes1: torch.Tensor,
...
@@ -116,6 +116,10 @@ def bbox_overlaps(bboxes1: torch.Tensor,
if
rows
*
cols
==
0
:
if
rows
*
cols
==
0
:
return
ious
return
ious
if
bboxes1
.
device
.
type
==
'cpu'
and
torch
.
__version__
==
'parrots'
:
return
_bbox_overlaps_cpu
(
bboxes1
,
bboxes2
,
mode
=
mode
,
aligned
=
aligned
,
offset
=
offset
)
ext_module
.
bbox_overlaps
(
ext_module
.
bbox_overlaps
(
bboxes1
,
bboxes2
,
ious
,
mode
=
mode_flag
,
aligned
=
aligned
,
offset
=
offset
)
bboxes1
,
bboxes2
,
ious
,
mode
=
mode_flag
,
aligned
=
aligned
,
offset
=
offset
)
...
...
mmcv/ops/box_iou_rotated.py
View file @
91da9643
...
@@ -133,12 +133,20 @@ def box_iou_rotated(bboxes1: torch.Tensor,
...
@@ -133,12 +133,20 @@ def box_iou_rotated(bboxes1: torch.Tensor,
if
aligned
:
if
aligned
:
ious
=
bboxes1
.
new_zeros
(
rows
)
ious
=
bboxes1
.
new_zeros
(
rows
)
else
:
else
:
ious
=
bboxes1
.
new_zeros
(
rows
*
cols
)
if
bboxes1
.
device
.
type
==
'mlu'
:
ious
=
bboxes1
.
new_zeros
([
rows
,
cols
])
else
:
ious
=
bboxes1
.
new_zeros
(
rows
*
cols
)
if
not
clockwise
:
if
not
clockwise
:
flip_mat
=
bboxes1
.
new_ones
(
bboxes1
.
shape
[
-
1
])
flip_mat
=
bboxes1
.
new_ones
(
bboxes1
.
shape
[
-
1
])
flip_mat
[
-
1
]
=
-
1
flip_mat
[
-
1
]
=
-
1
bboxes1
=
bboxes1
*
flip_mat
bboxes1
=
bboxes1
*
flip_mat
bboxes2
=
bboxes2
*
flip_mat
bboxes2
=
bboxes2
*
flip_mat
if
bboxes1
.
device
.
type
==
'npu'
:
scale_mat
=
bboxes1
.
new_ones
(
bboxes1
.
shape
[
-
1
])
scale_mat
[
-
1
]
=
1.0
/
0.01745329252
bboxes1
=
bboxes1
*
scale_mat
bboxes2
=
bboxes2
*
scale_mat
bboxes1
=
bboxes1
.
contiguous
()
bboxes1
=
bboxes1
.
contiguous
()
bboxes2
=
bboxes2
.
contiguous
()
bboxes2
=
bboxes2
.
contiguous
()
ext_module
.
box_iou_rotated
(
ext_module
.
box_iou_rotated
(
...
...
mmcv/ops/conv2d_gradfix.py
View file @
91da9643
...
@@ -16,6 +16,7 @@ from typing import Dict, Optional, Tuple, Union
...
@@ -16,6 +16,7 @@ from typing import Dict, Optional, Tuple, Union
import
torch
import
torch
from
mmengine.utils
import
digit_version
from
mmengine.utils
import
digit_version
from
mmengine.utils.dl_utils.parrots_wrapper
import
is_rocm_pytorch
enabled
=
True
enabled
=
True
weight_gradients_disabled
=
False
weight_gradients_disabled
=
False
...
@@ -283,28 +284,19 @@ def _conv2d_gradfix(
...
@@ -283,28 +284,19 @@ def _conv2d_gradfix(
output_padding
=
output_padding
,
output_padding
=
output_padding
,
output_mask
=
[
0
,
1
,
0
])[
1
]
output_mask
=
[
0
,
1
,
0
])[
1
]
else
:
else
:
is_rocm_pytorch
=
False
if
is_rocm_pytorch
():
try
:
name
=
'aten::miopen_convolution_transpose_backward_weight'
from
torch.utils.cpp_extension
import
ROCM_HOME
if
not
transpose
:
is_rocm_pytorch
=
True
if
((
torch
.
version
.
hip
is
not
None
)
and
name
=
'aten::miopen_convolution_backward_weight'
(
ROCM_HOME
is
not
None
))
else
False
except
ImportError
:
pass
name
=
''
flags
=
[]
if
is_rocm_pytorch
:
name
=
(
'aten::miopen_convolution_transpose_backward_weight'
if
transpose
else
'aten::miopen_convolution_backward_weight'
)
flags
=
[
flags
=
[
torch
.
backends
.
cudnn
.
benchmark
,
torch
.
backends
.
cudnn
.
benchmark
,
torch
.
backends
.
cudnn
.
deterministic
torch
.
backends
.
cudnn
.
deterministic
]
]
else
:
else
:
# General case => cuDNN.
# General case => cuDNN.
name
=
(
'aten::cudnn_convolution_transpose_backward_weight'
name
=
(
'aten::cudnn_convolution_transpose_backward_weight'
if
transpose
else
if
transpose
else
'aten::cudnn_convolution_backward_weight'
)
'aten::cudnn_convolution_backward_weight'
)
flags
=
[
flags
=
[
torch
.
backends
.
cudnn
.
benchmark
,
torch
.
backends
.
cudnn
.
benchmark
,
torch
.
backends
.
cudnn
.
deterministic
,
torch
.
backends
.
cudnn
.
deterministic
,
...
...
mmcv/ops/corner_pool.py
View file @
91da9643
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch
from
torch
import
Tensor
,
nn
from
mmengine.utils
import
digit_version
from
mmengine.utils
import
digit_version
from
torch
import
Tensor
,
nn
_mode_dict
=
{
'top'
:
0
,
'bottom'
:
1
,
'left'
:
2
,
'right'
:
3
}
_mode_dict
=
{
'top'
:
0
,
'bottom'
:
1
,
'left'
:
2
,
'right'
:
3
}
...
@@ -70,7 +71,8 @@ class CornerPool(nn.Module):
...
@@ -70,7 +71,8 @@ class CornerPool(nn.Module):
self
.
mode
=
mode
self
.
mode
=
mode
def
forward
(
self
,
x
:
Tensor
)
->
Tensor
:
def
forward
(
self
,
x
:
Tensor
)
->
Tensor
:
if
torch
.
__version__
!=
'parrots'
and
digit_version
(
torch
.
__version__
)
>=
digit_version
(
'1.5.0'
):
if
(
torch
.
__version__
!=
'parrots'
and
digit_version
(
torch
.
__version__
)
>=
digit_version
(
'1.5.0'
)):
dim
,
flip
=
self
.
cummax_dim_flip
[
self
.
mode
]
dim
,
flip
=
self
.
cummax_dim_flip
[
self
.
mode
]
if
flip
:
if
flip
:
x
=
x
.
flip
(
dim
)
x
=
x
.
flip
(
dim
)
...
...
mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
View file @
91da9643
...
@@ -2,6 +2,8 @@
...
@@ -2,6 +2,8 @@
#ifndef CARAFE_CUDA_KERNEL_CUH
#ifndef CARAFE_CUDA_KERNEL_CUH
#define CARAFE_CUDA_KERNEL_CUH
#define CARAFE_CUDA_KERNEL_CUH
#include <ATen/cuda/DeviceUtils.cuh>
#ifdef MMCV_USE_PARROTS
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#include "parrots_cuda_helper.hpp"
#else
#else
...
@@ -56,7 +58,8 @@ template <>
...
@@ -56,7 +58,8 @@ template <>
__device__
__forceinline__
phalf
warpReduceSum
(
phalf
val
)
{
__device__
__forceinline__
phalf
warpReduceSum
(
phalf
val
)
{
for
(
int
offset
=
WARP_SIZE
/
2
;
offset
>
0
;
offset
/=
2
)
for
(
int
offset
=
WARP_SIZE
/
2
;
offset
>
0
;
offset
/=
2
)
#ifdef MMCV_WITH_HIP
#ifdef MMCV_WITH_HIP
__PHALF
(
val
)
+=
__shfl_down
(
val
,
offset
);
// Using PyTorch's macro for half support
__PHALF
(
val
)
+=
WARP_SHFL_DOWN
(
val
,
offset
);
#else
#else
__PHALF
(
val
)
+=
__PHALF
(
val
)
+=
__shfl_down_sync
(
FULL_MASK
,
__PHALF
(
val
).
operator
__half
(),
offset
);
__shfl_down_sync
(
FULL_MASK
,
__PHALF
(
val
).
operator
__half
(),
offset
);
...
...
mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
/*************************************************************************
* Copyright (C) 2021 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#include <float.h>
#include "common_mlu_helper.hpp"
#define COORD_NUM 4
__nram__ char nmem_buf[MAX_NRAM_SIZE];
template <typename T>
__mlu_func__ void computeDiv(void *nram_dst, void *nram_src0, void *nram_src1,
void *nram_addition, const int32_t deal_num) {
__bang_active_reciphp((T *)nram_dst, (T *)nram_src1, deal_num);
__bang_mul((T *)nram_dst, (T *)nram_src0, (T *)nram_dst, deal_num);
}
template <>
__mlu_func__ void computeDiv<half>(void *nram_dst, void *nram_src0,
void *nram_src1, void *nram_addition,
const int32_t deal_num) {
__bang_half2float((float *)nram_addition, (half *)nram_src1, deal_num);
__bang_active_reciphp((float *)nram_addition, (float *)nram_addition,
deal_num);
__bang_float2half_rd((half *)nram_src1, (float *)nram_addition, deal_num);
__bang_mul((half *)nram_dst, (half *)nram_src0, (half *)nram_src1, deal_num);
}
template <typename T>
__mlu_func__ void bboxOverlapsWorkflow(
T *vec_b1_x1, T *vec_b1_y1, T *vec_b1_x2, T *vec_b1_y2, T *vec_b2_x1,
T *vec_b2_y1, T *vec_b2_x2, T *vec_b2_y2, T *vec_left, T *vec_right,
T *vec_top, T *vec_bottom, const T *bbox1, const T *bbox2, void *ious,
const int32_t offset, const int32_t mode, const int32_t batches_stride,
const int32_t num_bbox1, const int32_t num_bbox2, const bool aligned) {
int32_t task_batch_stride = (num_bbox1 + taskDim - 1) / taskDim;
int32_t batch_start = taskId * task_batch_stride;
int32_t batch_per_task = batch_start + task_batch_stride < num_bbox1
? task_batch_stride
: num_bbox1 - batch_start;
batch_per_task = batch_per_task > 0 ? batch_per_task : (0);
if (aligned) {
int32_t num_loop_cpy = batch_per_task / batches_stride;
int32_t num_rem_cpy_batches = batch_per_task % batches_stride;
num_loop_cpy = num_rem_cpy_batches > 0 ? num_loop_cpy + 1 : num_loop_cpy;
for (int32_t i = 0; i < num_loop_cpy; i++) {
int32_t index = batch_start + i * batches_stride;
int32_t handle_batches = index + batches_stride > num_bbox1
? num_rem_cpy_batches
: batches_stride;
int32_t b1 = index;
int32_t b2 = index;
int32_t base1 = b1 * COORD_NUM;
__memcpy(vec_b1_x1, &bbox1[base1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b1_y1, &bbox1[base1 + 1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b1_x2, &bbox1[base1 + 2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b1_y2, &bbox1[base1 + 3], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
int32_t base2 = b2 * COORD_NUM;
__memcpy(vec_b2_x1, &bbox2[base2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y1, &bbox2[base2 + 1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_x2, &bbox2[base2 + 2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y2, &bbox2[base2 + 3], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
// get the width and height
__bang_maxequal(vec_left, vec_b1_x1, vec_b2_x1, batches_stride);
__bang_minequal(vec_right, vec_b1_x2, vec_b2_x2, batches_stride);
__bang_maxequal(vec_top, vec_b1_y1, vec_b2_y1, batches_stride);
__bang_minequal(vec_bottom, vec_b1_y2, vec_b2_y2, batches_stride);
// right - left + offset ---> left
__bang_sub(vec_left, vec_right, vec_left, batches_stride);
__bang_add_scalar(vec_left, vec_left, (T)offset, batches_stride);
// bottom - top + offset ---> right
__bang_sub(vec_right, vec_bottom, vec_top, batches_stride);
__bang_add_scalar(vec_right, vec_right, (T)offset, batches_stride);
// zero vector ---> bottom
__bang_write_value(vec_bottom, batches_stride, 0.f);
// width --> vec_left
__bang_maxequal(vec_left, vec_bottom, vec_left, batches_stride);
T *width = vec_left;
// height --> vec_right
__bang_maxequal(vec_right, vec_bottom, vec_right, batches_stride);
T *height = vec_right;
// get the b1_area
// (b1_x2 - b1_x1 + offset) ---> vec_top
__bang_sub(vec_top, vec_b1_x2, vec_b1_x1, batches_stride);
__bang_add_scalar(vec_top, vec_top, (T)offset, batches_stride);
// (b1_y2 - b1_y1 + offset) ---> vec_bottom
__bang_sub(vec_bottom, vec_b1_y2, vec_b1_y1, batches_stride);
__bang_add_scalar(vec_bottom, vec_bottom, (T)offset, batches_stride);
// b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset)
// ---> vec_top;
__bang_mul(vec_top, vec_top, vec_bottom, batches_stride);
T *b1_area = vec_top;
// get the b2_area
// (b2_x2 - b2_x1 + offset) ---> b2_x1
__bang_sub(vec_b2_x1, vec_b2_x2, vec_b2_x1, batches_stride);
__bang_add_scalar(vec_b2_x1, vec_b2_x1, (T)offset, batches_stride);
// (b2_y2 - b2_y1 + offset) ---> b2_y1
__bang_sub(vec_b2_y1, vec_b2_y2, vec_b2_y1, batches_stride);
__bang_add_scalar(vec_b2_y1, vec_b2_y1, (T)offset, batches_stride);
// b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset)
// ---> b2_x1;
__bang_mul(vec_b2_x1, vec_b2_x1, vec_b2_y1, batches_stride);
T *b2_area = vec_b2_x1;
// inter_s = width * height
__bang_mul(height, width, height, batches_stride);
T *inter_s = height;
// offset vector ---> vec_b2_y1
__bang_write_value(vec_b2_y1, batches_stride, T(offset));
T *vec_offset = vec_b2_y1;
if (mode == 0) {
__bang_add(b1_area, b1_area, b2_area, batches_stride);
__bang_sub(b1_area, b1_area, inter_s, batches_stride);
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
} else {
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
}
T *base_s = b1_area;
// ious = inter_s / base_s
computeDiv<T>(width, inter_s, base_s, vec_b2_x2, batches_stride);
__memcpy((T *)ious + index, width, handle_batches * sizeof(T),
NRAM2GDRAM);
}
} else {
int32_t num_loop_cpy = num_bbox2 / batches_stride;
int32_t num_rem_cpy_batches = num_bbox2 % batches_stride;
num_loop_cpy = num_rem_cpy_batches > 0 ? num_loop_cpy + 1 : num_loop_cpy;
for (int32_t i = 0; i < batch_per_task; i++) {
int32_t index1 = batch_start + i;
int32_t b1 = index1;
int32_t base1 = b1 * COORD_NUM;
// set bbox1 and bbox2 to nram
__bang_write_value(vec_b1_x1, batches_stride, bbox1[base1]);
__bang_write_value(vec_b1_y1, batches_stride, bbox1[base1 + 1]);
__bang_write_value(vec_b1_x2, batches_stride, bbox1[base1 + 2]);
__bang_write_value(vec_b1_y2, batches_stride, bbox1[base1 + 3]);
for (int32_t j = 0; j < num_loop_cpy; j++) {
int32_t index2 = j * batches_stride;
int32_t handle_batches = index2 + batches_stride > num_bbox2
? num_rem_cpy_batches
: batches_stride;
int32_t b2 = index2;
int32_t base2 = b2 * COORD_NUM;
// copy bbox2 to nram
__memcpy(vec_b2_x1, &bbox2[base2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y1, &bbox2[base2 + 1], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_x2, &bbox2[base2 + 2], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
__memcpy(vec_b2_y2, &bbox2[base2 + 3], sizeof(T), GDRAM2NRAM, sizeof(T),
COORD_NUM * sizeof(T), handle_batches - 1);
// get the width and height
__bang_maxequal(vec_left, vec_b1_x1, vec_b2_x1, batches_stride);
__bang_minequal(vec_right, vec_b1_x2, vec_b2_x2, batches_stride);
__bang_maxequal(vec_top, vec_b1_y1, vec_b2_y1, batches_stride);
__bang_minequal(vec_bottom, vec_b1_y2, vec_b2_y2, batches_stride);
// right - left + offset ---> left
__bang_sub(vec_left, vec_right, vec_left, batches_stride);
__bang_add_scalar(vec_left, vec_left, (T)offset, batches_stride);
// bottom - top + offset ---> right
__bang_sub(vec_right, vec_bottom, vec_top, batches_stride);
__bang_add_scalar(vec_right, vec_right, (T)offset, batches_stride);
// zero vector ---> bottom
__bang_write_value(vec_bottom, batches_stride, (T)0);
// width --> vec_left
__bang_maxequal(vec_left, vec_bottom, vec_left, batches_stride);
T *width = vec_left;
// height --> vec_right
__bang_maxequal(vec_right, vec_bottom, vec_right, batches_stride);
T *height = vec_right;
// get the b1_area
// (b1_x2 - b1_x1 + offset) ---> vec_top
__bang_sub(vec_top, vec_b1_x2, vec_b1_x1, batches_stride);
__bang_add_scalar(vec_top, vec_top, (T)offset, batches_stride);
// (b1_y2 - b1_y1 + offset) ---> vec_bottom
__bang_sub(vec_bottom, vec_b1_y2, vec_b1_y1, batches_stride);
__bang_add_scalar(vec_bottom, vec_bottom, (T)offset, batches_stride);
// b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset)
// ---> vec_top;
__bang_mul(vec_top, vec_top, vec_bottom, batches_stride);
T *b1_area = vec_top;
// get the b2_area
// (b2_x2 - b2_x1 + offset) ---> b2_x1
__bang_sub(vec_b2_x1, vec_b2_x2, vec_b2_x1, batches_stride);
__bang_add_scalar(vec_b2_x1, vec_b2_x1, (T)offset, batches_stride);
// (b2_y2 - b2_y1 + offset) ---> b2_y1
__bang_sub(vec_b2_y1, vec_b2_y2, vec_b2_y1, batches_stride);
__bang_add_scalar(vec_b2_y1, vec_b2_y1, (T)offset, batches_stride);
// b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset)
// ---> b2_x1;
__bang_mul(vec_b2_x1, vec_b2_x1, vec_b2_y1, batches_stride);
T *b2_area = vec_b2_x1;
// inter_s = width * height
__bang_mul(height, width, height, batches_stride);
T *inter_s = height;
// offset vector ---> vec_b2_y1
__bang_write_value(vec_b2_y1, batches_stride, T(offset));
T *vec_offset = vec_b2_y1;
if (mode == 0) {
__bang_add(b1_area, b1_area, b2_area, batches_stride);
__bang_sub(b1_area, b1_area, inter_s, batches_stride);
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
} else {
__bang_maxequal(b1_area, vec_offset, b1_area, batches_stride);
}
T *base_s = b1_area;
// ious = inter_s / base_s
computeDiv<T>(width, inter_s, base_s, vec_b2_x2, batches_stride);
int32_t gdram_offset = index1 * num_bbox2 + index2;
__memcpy((T *)ious + gdram_offset, width, handle_batches * sizeof(T),
NRAM2GDRAM);
}
}
}
}
template <typename T>
__mlu_global__ void MLUUnion1KernelBBoxOverlaps(
const void *bbox1, const void *bbox2, void *ious, const int32_t num_bbox1,
const int32_t num_bbox2, const int32_t mode, const bool aligned,
const int32_t offset) {
/*
* NRAM partition
* |-------------------------------------------------------------|
* | vec_b1_x1 | vec_b1_y1 | vec_b1_x2 | vec_b1_y2 |
* |-------------------------------------------------------------|
* | vec_b2_x1 | vec_b2_y1 | vec_b2_x2 | vec_b2_y2 |
* |-------------------------------------------------------------|
* | vec_left | vec_right | vec_top | vec_bottom |
* |-------------------------------------------------------------|
*
*/
const int32_t align_bytes = PAD_DOWN(MAX_NRAM_SIZE, NFU_ALIGN_SIZE);
const int32_t split_nram_num = 12;
const int32_t nram_stride =
align_bytes / NFU_ALIGN_SIZE / split_nram_num * NFU_ALIGN_SIZE;
void *vec_b1_x1 = nmem_buf;
void *vec_b1_y1 = nmem_buf + nram_stride;
void *vec_b1_x2 = nmem_buf + 2 * nram_stride;
void *vec_b1_y2 = nmem_buf + 3 * nram_stride;
void *vec_b2_x1 = nmem_buf + 4 * nram_stride;
void *vec_b2_y1 = nmem_buf + 5 * nram_stride;
void *vec_b2_x2 = nmem_buf + 6 * nram_stride;
void *vec_b2_y2 = nmem_buf + 7 * nram_stride;
void *vec_left = nmem_buf + 8 * nram_stride;
void *vec_right = nmem_buf + 9 * nram_stride;
void *vec_top = nmem_buf + 10 * nram_stride;
void *vec_bottom = nmem_buf + 11 * nram_stride;
const int32_t vec_length = nram_stride / sizeof(T);
bboxOverlapsWorkflow((T *)vec_b1_x1, (T *)vec_b1_y1, (T *)vec_b1_x2,
(T *)vec_b1_y2, (T *)vec_b2_x1, (T *)vec_b2_y1,
(T *)vec_b2_x2, (T *)vec_b2_y2, (T *)vec_left,
(T *)vec_right, (T *)vec_top, (T *)vec_bottom,
(T *)bbox1, (T *)bbox2, (T *)ious, offset, mode,
vec_length, num_bbox1, num_bbox2, aligned);
}
void KernelBBoxOverlaps(cnrtDim3_t k_dim, cnrtFunctionType_t k_type,
cnrtQueue_t queue, const cnrtDataType_t d_type,
const void *bbox1, const void *bbox2, void *ious,
const int32_t num_bbox1, const int32_t num_bbox2,
const int32_t mode, const bool aligned,
const int32_t offset) {
if (d_type == CNRT_FLOAT16) {
MLUUnion1KernelBBoxOverlaps<half><<<k_dim, k_type, queue>>>(
bbox1, bbox2, ious, num_bbox1, num_bbox2, mode, aligned, offset);
} else {
MLUUnion1KernelBBoxOverlaps<float><<<k_dim, k_type, queue>>>(
bbox1, bbox2, ious, num_bbox1, num_bbox2, mode, aligned, offset);
}
}
mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
This diff is collapsed.
Click to expand it.
mmcv/ops/csrc/common/mlu/carafe_utils.hpp
deleted
100644 → 0
View file @
6f674c7e
/*************************************************************************
* Copyright (C) 2022 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef CARAFE_UTILS_HPP_
#define CARAFE_UTILS_HPP_
#define NRAM_ALIGN_SIZE 64
struct
CarafeForwardParam
{
int
N
;
// batch size
int
Hi
;
// input height
int
Wi
;
// input width
int
Ci
;
// input channels
int
Ho
;
// output height
int
Wo
;
// output width
int
Cg
;
// channels per group
int
kernel_size
;
// kernel_size
int
group_size
;
// group_size
int
scale_factor
;
// scale_factor
int
kernel_size_half
;
// kernel half size (K-1)/2
int
kernel_size_sq
;
// square of kernel size
int
dtype_size
;
// size of tensor data type
// Host arrays' geometry
int
input_stride_g
;
int
input_stride_w
;
int
input_stride_h
;
int
input_stride_n
;
int
input_size
;
int
mask_stride_kh
;
int
mask_stride_g
;
int
mask_stride_w
;
int
mask_stride_h
;
int
mask_stride_n
;
int
mask_size
;
int
output_stride_g
;
int
output_stride_w
;
int
output_stride_h
;
int
output_stride_n
;
int
output_size
;
// NRAM arrays' geometry
int
input_nram_stride_g
;
int
input_nram_stride_w
;
int
input_nram_stride_h
;
int
input_nram_size
;
int
mask_nram_stride_kh
;
int
mask_nram_stride_g
;
int
mask_nram_stride_w
;
int
mask_nram_stride_h
;
int
mask_nram_size
;
int
output_nram_stride_g
;
int
output_nram_stride_w
;
int
output_nram_stride_h
;
int
output_nram_size
;
// for address/compute alignment
int
align_size_NRAM
;
// for addressing on NRAM
int
align_size_NFU
;
// for NFU operation length
int
block_Cg_NFU
;
// for bang_mul_const
int
job_num
;
// total job number
};
struct
CarafeForwardBlockDim
{
int
Ho
;
// block size of output height
int
Wo
;
// block size of output width
int
Kh
;
// block size of kernel height
int
Kw
;
// block size of kernel width
int
G
;
// block size of groups
int
Cg
;
// block size of channels within a group
int
Hi
;
// block size of input height
int
Wi
;
// block size of input width
};
struct
CarafeForwardGridDim
{
int
Ho
;
// number of blocks of output height
int
Wo
;
int
Kh
;
int
Kw
;
int
G
;
int
Cg
;
};
#endif // CARAFE_UTILS_HPP_
mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
View file @
91da9643
...
@@ -45,148 +45,6 @@ __mlu_func__ inline scalar_t max(scalar_t a, scalar_t b) {
...
@@ -45,148 +45,6 @@ __mlu_func__ inline scalar_t max(scalar_t a, scalar_t b) {
return
a
>
b
?
a
:
b
;
return
a
>
b
?
a
:
b
;
}
}
/*!
* @brief loads data from global DRAM to NRAM with 2D pattern.
*
* @param[out] dst
* Pointer to NRAM that stores dst data.
* @param[in] src
* Pointer to global DRAM that stores src data.
* @param[in] size
* The byte size of segment in the lower dimension.
* @param[in] dst_str
* The data stride in bytes between segments in the lower dimension of dst.
* @param[in] src_str
* The data stride in bytes between segments in the lower dimension of src.
* @param[in] seg_num
* The total count of data segments in the lower dimension.
*/
template
<
typename
T
>
__mlu_func__
void
loadStr2D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
dst_str
,
const
int
src_str
,
const
int
seg_num
)
{
if
(
dst_str
==
src_str
&&
size
==
src_str
)
{
__memcpy
(
dst
,
src
,
src_str
*
seg_num
*
sizeof
(
T
),
GDRAM2NRAM
);
}
else
if
((
size
==
src_str
||
src_str
<=
dst_str
)
&&
src_str
*
sizeof
(
T
)
<=
512
)
{
// gather data less than 512Bytes to improve IO efficiency
T
*
tmp
=
(
T
*
)
dst
+
(
dst_str
-
src_str
)
*
seg_num
;
__memcpy
(
tmp
,
src
,
(
src_str
*
(
seg_num
-
1
)
+
size
)
*
sizeof
(
T
),
GDRAM2NRAM
);
if
(
dst_str
!=
src_str
)
{
__memcpy
(
dst
,
tmp
,
size
*
sizeof
(
T
),
NRAM2NRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
}
else
{
__memcpy
(
dst
,
src
,
size
*
sizeof
(
T
),
GDRAM2NRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
}
/*!
* @brief loads data from global DRAM to NRAM with 3D pattern.
*
* @param[out] dst
* Pointer to NRAM that stores dst data.
* @param[in] src
* Pointer to global DRAM that stores src data.
* @param[in] size
* The byte size of segment in the lowest dimension.
* @param[in] seg_num_in
* The total count of data segments in the lowest dimension.
* @param[in] seg_num_out
* The total count of data segments in the middle dimension.
* @param[in] dst_str_in
* The data stride in bytes between segments in the lowest dimension of dst.
* @param[in] dst_str_out
* The data stride in bytes between segments in the middle dimension of dst.
* @param[in] src_str_in
* The data stride in bytes between segments in the lowest dimension of src.
* @param[in] src_str_out
* The data stride in bytes between segments in the middle dimension of src.
*/
template
<
typename
T
>
__mlu_func__
void
loadStr3D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
seg_num_in
,
const
int
seg_num_out
,
const
int
dst_str_in
,
const
int
dst_str_out
,
const
int
src_str_in
,
const
int
src_str_out
)
{
T
*
tmp_dst
=
dst
;
T
*
tmp_src
=
src
;
for
(
int
i
=
0
;
i
<
seg_num_out
;
++
i
)
{
loadStr2D
(
tmp_dst
,
tmp_src
,
size
,
dst_str_in
,
src_str_in
,
seg_num_in
);
tmp_src
+=
src_str_out
;
tmp_dst
+=
dst_str_out
;
}
}
/*!
* @brief stores data from NRAM to global DRAM with 2D pattern.
*
* @param[out] dst
* Pointer to global DRAM that stores dst data.
* @param[in] src
* Pointer to NRAM that stores src data.
* @param[in] size
* The byte size of segment in the lower dimension.
* @param[in] dst_str
* The data stride in bytes between segments in the lower dimension of dst.
* @param[in] src_str
* The data stride in bytes between segments in the lower dimension of src.
* @param[in] seg_num
* The total count of data segments in the lower dimension.
*/
template
<
typename
T
>
__mlu_func__
void
storeStr2D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
seg_num
,
const
int
dst_str
,
const
int
src_str
)
{
if
((
size
==
dst_str
&&
dst_str
<=
src_str
)
&&
dst_str
*
sizeof
(
T
)
<=
512
)
{
// gather data less than 512Bytes to improve IO efficiency
if
(
dst_str
!=
src_str
)
{
__memcpy
(
src
,
src
,
size
*
sizeof
(
T
),
NRAM2NRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
__memcpy
(
dst
,
src
,
size
*
seg_num
*
sizeof
(
T
),
NRAM2GDRAM
);
}
else
{
__memcpy
(
dst
,
src
,
size
*
sizeof
(
T
),
NRAM2GDRAM
,
dst_str
*
sizeof
(
T
),
src_str
*
sizeof
(
T
),
seg_num
-
1
);
}
}
/*!
* @brief stores data from NRAM to global DRAM with 3D pattern.
*
* @param[out] dst
* Pointer to global DRAM that stores dst data.
* @param[in] src
* Pointer to NRAM that stores src data.
* @param[in] size
* The byte size of segment in the lowest dimension.
* @param[in] seg_num_in
* The total count of data segments in the lowest dimension.
* @param[in] seg_num_out
* The total count of data segments in the middle dimension.
* @param[in] dst_str_in
* The data stride in bytes between segments in the lowest dimension of dst.
* @param[in] dst_str_out
* The data stride in bytes between segments in the middle dimension of dst.
* @param[in] src_str_in
* The data stride in bytes between segments in the lowest dimension of src.
* @param[in] src_str_out
* The data stride in bytes between segments in the middle dimension of src.
*/
template
<
typename
T
>
__mlu_func__
void
storeStr3D
(
T
*
dst
,
T
*
src
,
const
int
size
,
const
int
seg_num_in
,
const
int
seg_num_out
,
const
int
dst_str_in
,
const
int
dst_str_out
,
const
int
src_str_in
,
const
int
src_str_out
)
{
T
*
tmp_dst
=
dst
;
T
*
tmp_src
=
src
;
for
(
int
i
=
0
;
i
<
seg_num_out
;
++
i
)
{
storeStr2D
(
tmp_dst
,
tmp_src
,
size
,
seg_num_in
,
dst_str_in
,
src_str_in
);
tmp_src
+=
src_str_out
;
tmp_dst
+=
dst_str_out
;
}
}
/*!
/*!
* @brief Converts int32 to float32 data type.
* @brief Converts int32 to float32 data type.
*
*
...
...
mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
This diff is collapsed.
Click to expand it.
mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu
deleted
100644 → 0
View file @
6f674c7e
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment