Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InstructBLIP_pytorch
Commits
c04f261a
Commit
c04f261a
authored
Aug 22, 2024
by
dongchy920
Browse files
InstruceBLIP
parents
Pipeline
#1594
canceled with stages
Changes
421
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2206 additions
and
0 deletions
+2206
-0
lavis/common/annotator/uniformer/mmcv/ops/roi_align_rotated.py
.../common/annotator/uniformer/mmcv/ops/roi_align_rotated.py
+177
-0
lavis/common/annotator/uniformer/mmcv/ops/roi_pool.py
lavis/common/annotator/uniformer/mmcv/ops/roi_pool.py
+86
-0
lavis/common/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
lavis/common/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
+114
-0
lavis/common/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
lavis/common/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
+77
-0
lavis/common/annotator/uniformer/mmcv/ops/saconv.py
lavis/common/annotator/uniformer/mmcv/ops/saconv.py
+145
-0
lavis/common/annotator/uniformer/mmcv/ops/scatter_points.py
lavis/common/annotator/uniformer/mmcv/ops/scatter_points.py
+135
-0
lavis/common/annotator/uniformer/mmcv/ops/sync_bn.py
lavis/common/annotator/uniformer/mmcv/ops/sync_bn.py
+279
-0
lavis/common/annotator/uniformer/mmcv/ops/three_interpolate.py
.../common/annotator/uniformer/mmcv/ops/three_interpolate.py
+68
-0
lavis/common/annotator/uniformer/mmcv/ops/three_nn.py
lavis/common/annotator/uniformer/mmcv/ops/three_nn.py
+51
-0
lavis/common/annotator/uniformer/mmcv/ops/tin_shift.py
lavis/common/annotator/uniformer/mmcv/ops/tin_shift.py
+68
-0
lavis/common/annotator/uniformer/mmcv/ops/upfirdn2d.py
lavis/common/annotator/uniformer/mmcv/ops/upfirdn2d.py
+330
-0
lavis/common/annotator/uniformer/mmcv/ops/voxelize.py
lavis/common/annotator/uniformer/mmcv/ops/voxelize.py
+132
-0
lavis/common/annotator/uniformer/mmcv/parallel/__init__.py
lavis/common/annotator/uniformer/mmcv/parallel/__init__.py
+13
-0
lavis/common/annotator/uniformer/mmcv/parallel/_functions.py
lavis/common/annotator/uniformer/mmcv/parallel/_functions.py
+79
-0
lavis/common/annotator/uniformer/mmcv/parallel/collate.py
lavis/common/annotator/uniformer/mmcv/parallel/collate.py
+84
-0
lavis/common/annotator/uniformer/mmcv/parallel/data_container.py
...ommon/annotator/uniformer/mmcv/parallel/data_container.py
+89
-0
lavis/common/annotator/uniformer/mmcv/parallel/data_parallel.py
...common/annotator/uniformer/mmcv/parallel/data_parallel.py
+89
-0
lavis/common/annotator/uniformer/mmcv/parallel/distributed.py
...s/common/annotator/uniformer/mmcv/parallel/distributed.py
+112
-0
lavis/common/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
...notator/uniformer/mmcv/parallel/distributed_deprecated.py
+70
-0
lavis/common/annotator/uniformer/mmcv/parallel/registry.py
lavis/common/annotator/uniformer/mmcv/parallel/registry.py
+8
-0
No files found.
Too many changes to show.
To preserve performance only
421 of 421+
files are displayed.
Plain diff
Email patch
lavis/common/annotator/uniformer/mmcv/ops/roi_align_rotated.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch.nn
as
nn
from
torch.autograd
import
Function
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'roi_align_rotated_forward'
,
'roi_align_rotated_backward'
])
class
RoIAlignRotatedFunction
(
Function
):
@
staticmethod
def
symbolic
(
g
,
features
,
rois
,
out_size
,
spatial_scale
,
sample_num
,
aligned
,
clockwise
):
if
isinstance
(
out_size
,
int
):
out_h
=
out_size
out_w
=
out_size
elif
isinstance
(
out_size
,
tuple
):
assert
len
(
out_size
)
==
2
assert
isinstance
(
out_size
[
0
],
int
)
assert
isinstance
(
out_size
[
1
],
int
)
out_h
,
out_w
=
out_size
else
:
raise
TypeError
(
'"out_size" must be an integer or tuple of integers'
)
return
g
.
op
(
'mmcv::MMCVRoIAlignRotated'
,
features
,
rois
,
output_height_i
=
out_h
,
output_width_i
=
out_h
,
spatial_scale_f
=
spatial_scale
,
sampling_ratio_i
=
sample_num
,
aligned_i
=
aligned
,
clockwise_i
=
clockwise
)
@
staticmethod
def
forward
(
ctx
,
features
,
rois
,
out_size
,
spatial_scale
,
sample_num
=
0
,
aligned
=
True
,
clockwise
=
False
):
if
isinstance
(
out_size
,
int
):
out_h
=
out_size
out_w
=
out_size
elif
isinstance
(
out_size
,
tuple
):
assert
len
(
out_size
)
==
2
assert
isinstance
(
out_size
[
0
],
int
)
assert
isinstance
(
out_size
[
1
],
int
)
out_h
,
out_w
=
out_size
else
:
raise
TypeError
(
'"out_size" must be an integer or tuple of integers'
)
ctx
.
spatial_scale
=
spatial_scale
ctx
.
sample_num
=
sample_num
ctx
.
aligned
=
aligned
ctx
.
clockwise
=
clockwise
ctx
.
save_for_backward
(
rois
)
ctx
.
feature_size
=
features
.
size
()
batch_size
,
num_channels
,
data_height
,
data_width
=
features
.
size
()
num_rois
=
rois
.
size
(
0
)
output
=
features
.
new_zeros
(
num_rois
,
num_channels
,
out_h
,
out_w
)
ext_module
.
roi_align_rotated_forward
(
features
,
rois
,
output
,
pooled_height
=
out_h
,
pooled_width
=
out_w
,
spatial_scale
=
spatial_scale
,
sample_num
=
sample_num
,
aligned
=
aligned
,
clockwise
=
clockwise
)
return
output
@
staticmethod
def
backward
(
ctx
,
grad_output
):
feature_size
=
ctx
.
feature_size
spatial_scale
=
ctx
.
spatial_scale
aligned
=
ctx
.
aligned
clockwise
=
ctx
.
clockwise
sample_num
=
ctx
.
sample_num
rois
=
ctx
.
saved_tensors
[
0
]
assert
feature_size
is
not
None
batch_size
,
num_channels
,
data_height
,
data_width
=
feature_size
out_w
=
grad_output
.
size
(
3
)
out_h
=
grad_output
.
size
(
2
)
grad_input
=
grad_rois
=
None
if
ctx
.
needs_input_grad
[
0
]:
grad_input
=
rois
.
new_zeros
(
batch_size
,
num_channels
,
data_height
,
data_width
)
ext_module
.
roi_align_rotated_backward
(
grad_output
.
contiguous
(),
rois
,
grad_input
,
pooled_height
=
out_h
,
pooled_width
=
out_w
,
spatial_scale
=
spatial_scale
,
sample_num
=
sample_num
,
aligned
=
aligned
,
clockwise
=
clockwise
)
return
grad_input
,
grad_rois
,
None
,
None
,
None
,
None
,
None
roi_align_rotated
=
RoIAlignRotatedFunction
.
apply
class
RoIAlignRotated
(
nn
.
Module
):
"""RoI align pooling layer for rotated proposals.
It accepts a feature map of shape (N, C, H, W) and rois with shape
(n, 6) with each roi decoded as (batch_index, center_x, center_y,
w, h, angle). The angle is in radian.
Args:
out_size (tuple): h, w
spatial_scale (float): scale the input boxes by this number
sample_num (int): number of inputs samples to take for each
output sample. 0 to take samples densely for current models.
aligned (bool): if False, use the legacy implementation in
MMDetection. If True, align the results more perfectly.
Default: True.
clockwise (bool): If True, the angle in each proposal follows a
clockwise fashion in image space, otherwise, the angle is
counterclockwise. Default: False.
Note:
The implementation of RoIAlign when aligned=True is modified from
https://github.com/facebookresearch/detectron2/
The meaning of aligned=True:
Given a continuous coordinate c, its two neighboring pixel
indices (in our pixel model) are computed by floor(c - 0.5) and
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
indices [0] and [1] (which are sampled from the underlying signal
at continuous coordinates 0.5 and 1.5). But the original roi_align
(aligned=False) does not subtract the 0.5 when computing
neighboring pixel indices and therefore it uses pixels with a
slightly incorrect alignment (relative to our pixel model) when
performing bilinear interpolation.
With `aligned=True`,
we first appropriately scale the ROI and then shift it by -0.5
prior to calling roi_align. This produces the correct neighbors;
The difference does not make a difference to the model's
performance if ROIAlign is used together with conv layers.
"""
def
__init__
(
self
,
out_size
,
spatial_scale
,
sample_num
=
0
,
aligned
=
True
,
clockwise
=
False
):
super
(
RoIAlignRotated
,
self
).
__init__
()
self
.
out_size
=
out_size
self
.
spatial_scale
=
float
(
spatial_scale
)
self
.
sample_num
=
int
(
sample_num
)
self
.
aligned
=
aligned
self
.
clockwise
=
clockwise
def
forward
(
self
,
features
,
rois
):
return
RoIAlignRotatedFunction
.
apply
(
features
,
rois
,
self
.
out_size
,
self
.
spatial_scale
,
self
.
sample_num
,
self
.
aligned
,
self
.
clockwise
)
lavis/common/annotator/uniformer/mmcv/ops/roi_pool.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Function
from
torch.autograd.function
import
once_differentiable
from
torch.nn.modules.utils
import
_pair
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'roi_pool_forward'
,
'roi_pool_backward'
])
class
RoIPoolFunction
(
Function
):
@
staticmethod
def
symbolic
(
g
,
input
,
rois
,
output_size
,
spatial_scale
):
return
g
.
op
(
'MaxRoiPool'
,
input
,
rois
,
pooled_shape_i
=
output_size
,
spatial_scale_f
=
spatial_scale
)
@
staticmethod
def
forward
(
ctx
,
input
,
rois
,
output_size
,
spatial_scale
=
1.0
):
ctx
.
output_size
=
_pair
(
output_size
)
ctx
.
spatial_scale
=
spatial_scale
ctx
.
input_shape
=
input
.
size
()
assert
rois
.
size
(
1
)
==
5
,
'RoI must be (idx, x1, y1, x2, y2)!'
output_shape
=
(
rois
.
size
(
0
),
input
.
size
(
1
),
ctx
.
output_size
[
0
],
ctx
.
output_size
[
1
])
output
=
input
.
new_zeros
(
output_shape
)
argmax
=
input
.
new_zeros
(
output_shape
,
dtype
=
torch
.
int
)
ext_module
.
roi_pool_forward
(
input
,
rois
,
output
,
argmax
,
pooled_height
=
ctx
.
output_size
[
0
],
pooled_width
=
ctx
.
output_size
[
1
],
spatial_scale
=
ctx
.
spatial_scale
)
ctx
.
save_for_backward
(
rois
,
argmax
)
return
output
@
staticmethod
@
once_differentiable
def
backward
(
ctx
,
grad_output
):
rois
,
argmax
=
ctx
.
saved_tensors
grad_input
=
grad_output
.
new_zeros
(
ctx
.
input_shape
)
ext_module
.
roi_pool_backward
(
grad_output
,
rois
,
argmax
,
grad_input
,
pooled_height
=
ctx
.
output_size
[
0
],
pooled_width
=
ctx
.
output_size
[
1
],
spatial_scale
=
ctx
.
spatial_scale
)
return
grad_input
,
None
,
None
,
None
roi_pool
=
RoIPoolFunction
.
apply
class
RoIPool
(
nn
.
Module
):
def
__init__
(
self
,
output_size
,
spatial_scale
=
1.0
):
super
(
RoIPool
,
self
).
__init__
()
self
.
output_size
=
_pair
(
output_size
)
self
.
spatial_scale
=
float
(
spatial_scale
)
def
forward
(
self
,
input
,
rois
):
return
roi_pool
(
input
,
rois
,
self
.
output_size
,
self
.
spatial_scale
)
def
__repr__
(
self
):
s
=
self
.
__class__
.
__name__
s
+=
f
'(output_size=
{
self
.
output_size
}
, '
s
+=
f
'spatial_scale=
{
self
.
spatial_scale
}
)'
return
s
lavis/common/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch
import
nn
as
nn
from
torch.autograd
import
Function
import
annotator.uniformer.mmcv
as
mmcv
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'roiaware_pool3d_forward'
,
'roiaware_pool3d_backward'
])
class
RoIAwarePool3d
(
nn
.
Module
):
"""Encode the geometry-specific features of each 3D proposal.
Please refer to `PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_ for more
details.
Args:
out_size (int or tuple): The size of output features. n or
[n1, n2, n3].
max_pts_per_voxel (int, optional): The maximum number of points per
voxel. Default: 128.
mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'.
Default: 'max'.
"""
def
__init__
(
self
,
out_size
,
max_pts_per_voxel
=
128
,
mode
=
'max'
):
super
().
__init__
()
self
.
out_size
=
out_size
self
.
max_pts_per_voxel
=
max_pts_per_voxel
assert
mode
in
[
'max'
,
'avg'
]
pool_mapping
=
{
'max'
:
0
,
'avg'
:
1
}
self
.
mode
=
pool_mapping
[
mode
]
def
forward
(
self
,
rois
,
pts
,
pts_feature
):
"""
Args:
rois (torch.Tensor): [N, 7], in LiDAR coordinate,
(x, y, z) is the bottom center of rois.
pts (torch.Tensor): [npoints, 3], coordinates of input points.
pts_feature (torch.Tensor): [npoints, C], features of input points.
Returns:
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
"""
return
RoIAwarePool3dFunction
.
apply
(
rois
,
pts
,
pts_feature
,
self
.
out_size
,
self
.
max_pts_per_voxel
,
self
.
mode
)
class
RoIAwarePool3dFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
rois
,
pts
,
pts_feature
,
out_size
,
max_pts_per_voxel
,
mode
):
"""
Args:
rois (torch.Tensor): [N, 7], in LiDAR coordinate,
(x, y, z) is the bottom center of rois.
pts (torch.Tensor): [npoints, 3], coordinates of input points.
pts_feature (torch.Tensor): [npoints, C], features of input points.
out_size (int or tuple): The size of output features. n or
[n1, n2, n3].
max_pts_per_voxel (int): The maximum number of points per voxel.
Default: 128.
mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average
pool).
Returns:
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output
pooled features.
"""
if
isinstance
(
out_size
,
int
):
out_x
=
out_y
=
out_z
=
out_size
else
:
assert
len
(
out_size
)
==
3
assert
mmcv
.
is_tuple_of
(
out_size
,
int
)
out_x
,
out_y
,
out_z
=
out_size
num_rois
=
rois
.
shape
[
0
]
num_channels
=
pts_feature
.
shape
[
-
1
]
num_pts
=
pts
.
shape
[
0
]
pooled_features
=
pts_feature
.
new_zeros
(
(
num_rois
,
out_x
,
out_y
,
out_z
,
num_channels
))
argmax
=
pts_feature
.
new_zeros
(
(
num_rois
,
out_x
,
out_y
,
out_z
,
num_channels
),
dtype
=
torch
.
int
)
pts_idx_of_voxels
=
pts_feature
.
new_zeros
(
(
num_rois
,
out_x
,
out_y
,
out_z
,
max_pts_per_voxel
),
dtype
=
torch
.
int
)
ext_module
.
roiaware_pool3d_forward
(
rois
,
pts
,
pts_feature
,
argmax
,
pts_idx_of_voxels
,
pooled_features
,
mode
)
ctx
.
roiaware_pool3d_for_backward
=
(
pts_idx_of_voxels
,
argmax
,
mode
,
num_pts
,
num_channels
)
return
pooled_features
@
staticmethod
def
backward
(
ctx
,
grad_out
):
ret
=
ctx
.
roiaware_pool3d_for_backward
pts_idx_of_voxels
,
argmax
,
mode
,
num_pts
,
num_channels
=
ret
grad_in
=
grad_out
.
new_zeros
((
num_pts
,
num_channels
))
ext_module
.
roiaware_pool3d_backward
(
pts_idx_of_voxels
,
argmax
,
grad_out
.
contiguous
(),
grad_in
,
mode
)
return
None
,
None
,
grad_in
,
None
,
None
,
None
lavis/common/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
0 → 100644
View file @
c04f261a
from
torch
import
nn
as
nn
from
torch.autograd
import
Function
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'roipoint_pool3d_forward'
])
class
RoIPointPool3d
(
nn
.
Module
):
"""Encode the geometry-specific features of each 3D proposal.
Please refer to `Paper of PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_
for more details.
Args:
num_sampled_points (int, optional): Number of samples in each roi.
Default: 512.
"""
def
__init__
(
self
,
num_sampled_points
=
512
):
super
().
__init__
()
self
.
num_sampled_points
=
num_sampled_points
def
forward
(
self
,
points
,
point_features
,
boxes3d
):
"""
Args:
points (torch.Tensor): Input points whose shape is (B, N, C).
point_features (torch.Tensor): Features of input points whose shape
is (B, N, C).
boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
Returns:
pooled_features (torch.Tensor): The output pooled features whose
shape is (B, M, 512, 3 + C).
pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
"""
return
RoIPointPool3dFunction
.
apply
(
points
,
point_features
,
boxes3d
,
self
.
num_sampled_points
)
class
RoIPointPool3dFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
points
,
point_features
,
boxes3d
,
num_sampled_points
=
512
):
"""
Args:
points (torch.Tensor): Input points whose shape is (B, N, C).
point_features (torch.Tensor): Features of input points whose shape
is (B, N, C).
boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
num_sampled_points (int, optional): The num of sampled points.
Default: 512.
Returns:
pooled_features (torch.Tensor): The output pooled features whose
shape is (B, M, 512, 3 + C).
pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
"""
assert
len
(
points
.
shape
)
==
3
and
points
.
shape
[
2
]
==
3
batch_size
,
boxes_num
,
feature_len
=
points
.
shape
[
0
],
boxes3d
.
shape
[
1
],
point_features
.
shape
[
2
]
pooled_boxes3d
=
boxes3d
.
view
(
batch_size
,
-
1
,
7
)
pooled_features
=
point_features
.
new_zeros
(
(
batch_size
,
boxes_num
,
num_sampled_points
,
3
+
feature_len
))
pooled_empty_flag
=
point_features
.
new_zeros
(
(
batch_size
,
boxes_num
)).
int
()
ext_module
.
roipoint_pool3d_forward
(
points
.
contiguous
(),
pooled_boxes3d
.
contiguous
(),
point_features
.
contiguous
(),
pooled_features
,
pooled_empty_flag
)
return
pooled_features
,
pooled_empty_flag
@
staticmethod
def
backward
(
ctx
,
grad_out
):
raise
NotImplementedError
lavis/common/annotator/uniformer/mmcv/ops/saconv.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
annotator.uniformer.mmcv.cnn
import
CONV_LAYERS
,
ConvAWS2d
,
constant_init
from
annotator.uniformer.mmcv.ops.deform_conv
import
deform_conv2d
from
annotator.uniformer.mmcv.utils
import
TORCH_VERSION
,
digit_version
@
CONV_LAYERS
.
register_module
(
name
=
'SAC'
)
class
SAConv2d
(
ConvAWS2d
):
"""SAC (Switchable Atrous Convolution)
This is an implementation of SAC in DetectoRS
(https://arxiv.org/pdf/2006.02334.pdf).
Args:
in_channels (int): Number of channels in the input image
out_channels (int): Number of channels produced by the convolution
kernel_size (int or tuple): Size of the convolving kernel
stride (int or tuple, optional): Stride of the convolution. Default: 1
padding (int or tuple, optional): Zero-padding added to both sides of
the input. Default: 0
padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
``'replicate'`` or ``'circular'``. Default: ``'zeros'``
dilation (int or tuple, optional): Spacing between kernel elements.
Default: 1
groups (int, optional): Number of blocked connections from input
channels to output channels. Default: 1
bias (bool, optional): If ``True``, adds a learnable bias to the
output. Default: ``True``
use_deform: If ``True``, replace convolution with deformable
convolution. Default: ``False``.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias
=
True
,
use_deform
=
False
):
super
().
__init__
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
,
groups
=
groups
,
bias
=
bias
)
self
.
use_deform
=
use_deform
self
.
switch
=
nn
.
Conv2d
(
self
.
in_channels
,
1
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
True
)
self
.
weight_diff
=
nn
.
Parameter
(
torch
.
Tensor
(
self
.
weight
.
size
()))
self
.
pre_context
=
nn
.
Conv2d
(
self
.
in_channels
,
self
.
in_channels
,
kernel_size
=
1
,
bias
=
True
)
self
.
post_context
=
nn
.
Conv2d
(
self
.
out_channels
,
self
.
out_channels
,
kernel_size
=
1
,
bias
=
True
)
if
self
.
use_deform
:
self
.
offset_s
=
nn
.
Conv2d
(
self
.
in_channels
,
18
,
kernel_size
=
3
,
padding
=
1
,
stride
=
stride
,
bias
=
True
)
self
.
offset_l
=
nn
.
Conv2d
(
self
.
in_channels
,
18
,
kernel_size
=
3
,
padding
=
1
,
stride
=
stride
,
bias
=
True
)
self
.
init_weights
()
def
init_weights
(
self
):
constant_init
(
self
.
switch
,
0
,
bias
=
1
)
self
.
weight_diff
.
data
.
zero_
()
constant_init
(
self
.
pre_context
,
0
)
constant_init
(
self
.
post_context
,
0
)
if
self
.
use_deform
:
constant_init
(
self
.
offset_s
,
0
)
constant_init
(
self
.
offset_l
,
0
)
def
forward
(
self
,
x
):
# pre-context
avg_x
=
F
.
adaptive_avg_pool2d
(
x
,
output_size
=
1
)
avg_x
=
self
.
pre_context
(
avg_x
)
avg_x
=
avg_x
.
expand_as
(
x
)
x
=
x
+
avg_x
# switch
avg_x
=
F
.
pad
(
x
,
pad
=
(
2
,
2
,
2
,
2
),
mode
=
'reflect'
)
avg_x
=
F
.
avg_pool2d
(
avg_x
,
kernel_size
=
5
,
stride
=
1
,
padding
=
0
)
switch
=
self
.
switch
(
avg_x
)
# sac
weight
=
self
.
_get_weight
(
self
.
weight
)
zero_bias
=
torch
.
zeros
(
self
.
out_channels
,
device
=
weight
.
device
,
dtype
=
weight
.
dtype
)
if
self
.
use_deform
:
offset
=
self
.
offset_s
(
avg_x
)
out_s
=
deform_conv2d
(
x
,
offset
,
weight
,
self
.
stride
,
self
.
padding
,
self
.
dilation
,
self
.
groups
,
1
)
else
:
if
(
TORCH_VERSION
==
'parrots'
or
digit_version
(
TORCH_VERSION
)
<
digit_version
(
'1.5.0'
)):
out_s
=
super
().
conv2d_forward
(
x
,
weight
)
elif
digit_version
(
TORCH_VERSION
)
>=
digit_version
(
'1.8.0'
):
# bias is a required argument of _conv_forward in torch 1.8.0
out_s
=
super
().
_conv_forward
(
x
,
weight
,
zero_bias
)
else
:
out_s
=
super
().
_conv_forward
(
x
,
weight
)
ori_p
=
self
.
padding
ori_d
=
self
.
dilation
self
.
padding
=
tuple
(
3
*
p
for
p
in
self
.
padding
)
self
.
dilation
=
tuple
(
3
*
d
for
d
in
self
.
dilation
)
weight
=
weight
+
self
.
weight_diff
if
self
.
use_deform
:
offset
=
self
.
offset_l
(
avg_x
)
out_l
=
deform_conv2d
(
x
,
offset
,
weight
,
self
.
stride
,
self
.
padding
,
self
.
dilation
,
self
.
groups
,
1
)
else
:
if
(
TORCH_VERSION
==
'parrots'
or
digit_version
(
TORCH_VERSION
)
<
digit_version
(
'1.5.0'
)):
out_l
=
super
().
conv2d_forward
(
x
,
weight
)
elif
digit_version
(
TORCH_VERSION
)
>=
digit_version
(
'1.8.0'
):
# bias is a required argument of _conv_forward in torch 1.8.0
out_l
=
super
().
_conv_forward
(
x
,
weight
,
zero_bias
)
else
:
out_l
=
super
().
_conv_forward
(
x
,
weight
)
out
=
switch
*
out_s
+
(
1
-
switch
)
*
out_l
self
.
padding
=
ori_p
self
.
dilation
=
ori_d
# post-context
avg_x
=
F
.
adaptive_avg_pool2d
(
out
,
output_size
=
1
)
avg_x
=
self
.
post_context
(
avg_x
)
avg_x
=
avg_x
.
expand_as
(
out
)
out
=
out
+
avg_x
return
out
lavis/common/annotator/uniformer/mmcv/ops/scatter_points.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch
import
nn
from
torch.autograd
import
Function
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'dynamic_point_to_voxel_forward'
,
'dynamic_point_to_voxel_backward'
])
class
_DynamicScatter
(
Function
):
@
staticmethod
def
forward
(
ctx
,
feats
,
coors
,
reduce_type
=
'max'
):
"""convert kitti points(N, >=3) to voxels.
Args:
feats (torch.Tensor): [N, C]. Points features to be reduced
into voxels.
coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates
(specifically multi-dim voxel index) of each points.
reduce_type (str, optional): Reduce op. support 'max', 'sum' and
'mean'. Default: 'max'.
Returns:
voxel_feats (torch.Tensor): [M, C]. Reduced features, input
features that shares the same voxel coordinates are reduced to
one row.
voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates.
"""
results
=
ext_module
.
dynamic_point_to_voxel_forward
(
feats
,
coors
,
reduce_type
)
(
voxel_feats
,
voxel_coors
,
point2voxel_map
,
voxel_points_count
)
=
results
ctx
.
reduce_type
=
reduce_type
ctx
.
save_for_backward
(
feats
,
voxel_feats
,
point2voxel_map
,
voxel_points_count
)
ctx
.
mark_non_differentiable
(
voxel_coors
)
return
voxel_feats
,
voxel_coors
@
staticmethod
def
backward
(
ctx
,
grad_voxel_feats
,
grad_voxel_coors
=
None
):
(
feats
,
voxel_feats
,
point2voxel_map
,
voxel_points_count
)
=
ctx
.
saved_tensors
grad_feats
=
torch
.
zeros_like
(
feats
)
# TODO: whether to use index put or use cuda_backward
# To use index put, need point to voxel index
ext_module
.
dynamic_point_to_voxel_backward
(
grad_feats
,
grad_voxel_feats
.
contiguous
(),
feats
,
voxel_feats
,
point2voxel_map
,
voxel_points_count
,
ctx
.
reduce_type
)
return
grad_feats
,
None
,
None
dynamic_scatter
=
_DynamicScatter
.
apply
class
DynamicScatter
(
nn
.
Module
):
"""Scatters points into voxels, used in the voxel encoder with dynamic
voxelization.
Note:
The CPU and GPU implementation get the same output, but have numerical
difference after summation and division (e.g., 5e-7).
Args:
voxel_size (list): list [x, y, z] size of three dimension.
point_cloud_range (list): The coordinate range of points, [x_min,
y_min, z_min, x_max, y_max, z_max].
average_points (bool): whether to use avg pooling to scatter points
into voxel.
"""
def
__init__
(
self
,
voxel_size
,
point_cloud_range
,
average_points
:
bool
):
super
().
__init__
()
self
.
voxel_size
=
voxel_size
self
.
point_cloud_range
=
point_cloud_range
self
.
average_points
=
average_points
def
forward_single
(
self
,
points
,
coors
):
"""Scatters points into voxels.
Args:
points (torch.Tensor): Points to be reduced into voxels.
coors (torch.Tensor): Corresponding voxel coordinates (specifically
multi-dim voxel index) of each points.
Returns:
voxel_feats (torch.Tensor): Reduced features, input features that
shares the same voxel coordinates are reduced to one row.
voxel_coors (torch.Tensor): Voxel coordinates.
"""
reduce
=
'mean'
if
self
.
average_points
else
'max'
return
dynamic_scatter
(
points
.
contiguous
(),
coors
.
contiguous
(),
reduce
)
def
forward
(
self
,
points
,
coors
):
"""Scatters points/features into voxels.
Args:
points (torch.Tensor): Points to be reduced into voxels.
coors (torch.Tensor): Corresponding voxel coordinates (specifically
multi-dim voxel index) of each points.
Returns:
voxel_feats (torch.Tensor): Reduced features, input features that
shares the same voxel coordinates are reduced to one row.
voxel_coors (torch.Tensor): Voxel coordinates.
"""
if
coors
.
size
(
-
1
)
==
3
:
return
self
.
forward_single
(
points
,
coors
)
else
:
batch_size
=
coors
[
-
1
,
0
]
+
1
voxels
,
voxel_coors
=
[],
[]
for
i
in
range
(
batch_size
):
inds
=
torch
.
where
(
coors
[:,
0
]
==
i
)
voxel
,
voxel_coor
=
self
.
forward_single
(
points
[
inds
],
coors
[
inds
][:,
1
:])
coor_pad
=
nn
.
functional
.
pad
(
voxel_coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
voxel_coors
.
append
(
coor_pad
)
voxels
.
append
(
voxel
)
features
=
torch
.
cat
(
voxels
,
dim
=
0
)
feature_coors
=
torch
.
cat
(
voxel_coors
,
dim
=
0
)
return
features
,
feature_coors
def
__repr__
(
self
):
s
=
self
.
__class__
.
__name__
+
'('
s
+=
'voxel_size='
+
str
(
self
.
voxel_size
)
s
+=
', point_cloud_range='
+
str
(
self
.
point_cloud_range
)
s
+=
', average_points='
+
str
(
self
.
average_points
)
s
+=
')'
return
s
lavis/common/annotator/uniformer/mmcv/ops/sync_bn.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.distributed
as
dist
import
torch.nn.functional
as
F
from
torch.autograd
import
Function
from
torch.autograd.function
import
once_differentiable
from
torch.nn.modules.module
import
Module
from
torch.nn.parameter
import
Parameter
from
annotator.uniformer.mmcv.cnn
import
NORM_LAYERS
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'sync_bn_forward_mean'
,
'sync_bn_forward_var'
,
'sync_bn_forward_output'
,
'sync_bn_backward_param'
,
'sync_bn_backward_data'
])
class
SyncBatchNormFunction
(
Function
):
@
staticmethod
def
symbolic
(
g
,
input
,
running_mean
,
running_var
,
weight
,
bias
,
momentum
,
eps
,
group
,
group_size
,
stats_mode
):
return
g
.
op
(
'mmcv::MMCVSyncBatchNorm'
,
input
,
running_mean
,
running_var
,
weight
,
bias
,
momentum_f
=
momentum
,
eps_f
=
eps
,
group_i
=
group
,
group_size_i
=
group_size
,
stats_mode
=
stats_mode
)
@
staticmethod
def
forward
(
self
,
input
,
running_mean
,
running_var
,
weight
,
bias
,
momentum
,
eps
,
group
,
group_size
,
stats_mode
):
self
.
momentum
=
momentum
self
.
eps
=
eps
self
.
group
=
group
self
.
group_size
=
group_size
self
.
stats_mode
=
stats_mode
assert
isinstance
(
input
,
(
torch
.
HalfTensor
,
torch
.
FloatTensor
,
torch
.
cuda
.
HalfTensor
,
torch
.
cuda
.
FloatTensor
)),
\
f
'only support Half or Float Tensor, but
{
input
.
type
()
}
'
output
=
torch
.
zeros_like
(
input
)
input3d
=
input
.
flatten
(
start_dim
=
2
)
output3d
=
output
.
view_as
(
input3d
)
num_channels
=
input3d
.
size
(
1
)
# ensure mean/var/norm/std are initialized as zeros
# ``torch.empty()`` does not guarantee that
mean
=
torch
.
zeros
(
num_channels
,
dtype
=
torch
.
float
,
device
=
input3d
.
device
)
var
=
torch
.
zeros
(
num_channels
,
dtype
=
torch
.
float
,
device
=
input3d
.
device
)
norm
=
torch
.
zeros_like
(
input3d
,
dtype
=
torch
.
float
,
device
=
input3d
.
device
)
std
=
torch
.
zeros
(
num_channels
,
dtype
=
torch
.
float
,
device
=
input3d
.
device
)
batch_size
=
input3d
.
size
(
0
)
if
batch_size
>
0
:
ext_module
.
sync_bn_forward_mean
(
input3d
,
mean
)
batch_flag
=
torch
.
ones
([
1
],
device
=
mean
.
device
,
dtype
=
mean
.
dtype
)
else
:
# skip updating mean and leave it as zeros when the input is empty
batch_flag
=
torch
.
zeros
([
1
],
device
=
mean
.
device
,
dtype
=
mean
.
dtype
)
# synchronize mean and the batch flag
vec
=
torch
.
cat
([
mean
,
batch_flag
])
if
self
.
stats_mode
==
'N'
:
vec
*=
batch_size
if
self
.
group_size
>
1
:
dist
.
all_reduce
(
vec
,
group
=
self
.
group
)
total_batch
=
vec
[
-
1
].
detach
()
mean
=
vec
[:
num_channels
]
if
self
.
stats_mode
==
'default'
:
mean
=
mean
/
self
.
group_size
elif
self
.
stats_mode
==
'N'
:
mean
=
mean
/
total_batch
.
clamp
(
min
=
1
)
else
:
raise
NotImplementedError
# leave var as zeros when the input is empty
if
batch_size
>
0
:
ext_module
.
sync_bn_forward_var
(
input3d
,
mean
,
var
)
if
self
.
stats_mode
==
'N'
:
var
*=
batch_size
if
self
.
group_size
>
1
:
dist
.
all_reduce
(
var
,
group
=
self
.
group
)
if
self
.
stats_mode
==
'default'
:
var
/=
self
.
group_size
elif
self
.
stats_mode
==
'N'
:
var
/=
total_batch
.
clamp
(
min
=
1
)
else
:
raise
NotImplementedError
# if the total batch size over all the ranks is zero,
# we should not update the statistics in the current batch
update_flag
=
total_batch
.
clamp
(
max
=
1
)
momentum
=
update_flag
*
self
.
momentum
ext_module
.
sync_bn_forward_output
(
input3d
,
mean
,
var
,
weight
,
bias
,
running_mean
,
running_var
,
norm
,
std
,
output3d
,
eps
=
self
.
eps
,
momentum
=
momentum
,
group_size
=
self
.
group_size
)
self
.
save_for_backward
(
norm
,
std
,
weight
)
return
output
@
staticmethod
@
once_differentiable
def
backward
(
self
,
grad_output
):
norm
,
std
,
weight
=
self
.
saved_tensors
grad_weight
=
torch
.
zeros_like
(
weight
)
grad_bias
=
torch
.
zeros_like
(
weight
)
grad_input
=
torch
.
zeros_like
(
grad_output
)
grad_output3d
=
grad_output
.
flatten
(
start_dim
=
2
)
grad_input3d
=
grad_input
.
view_as
(
grad_output3d
)
batch_size
=
grad_input3d
.
size
(
0
)
if
batch_size
>
0
:
ext_module
.
sync_bn_backward_param
(
grad_output3d
,
norm
,
grad_weight
,
grad_bias
)
# all reduce
if
self
.
group_size
>
1
:
dist
.
all_reduce
(
grad_weight
,
group
=
self
.
group
)
dist
.
all_reduce
(
grad_bias
,
group
=
self
.
group
)
grad_weight
/=
self
.
group_size
grad_bias
/=
self
.
group_size
if
batch_size
>
0
:
ext_module
.
sync_bn_backward_data
(
grad_output3d
,
weight
,
grad_weight
,
grad_bias
,
norm
,
std
,
grad_input3d
)
return
grad_input
,
None
,
None
,
grad_weight
,
grad_bias
,
\
None
,
None
,
None
,
None
,
None
@
NORM_LAYERS
.
register_module
(
name
=
'MMSyncBN'
)
class
SyncBatchNorm
(
Module
):
"""Synchronized Batch Normalization.
Args:
num_features (int): number of features/chennels in input tensor
eps (float, optional): a value added to the denominator for numerical
stability. Defaults to 1e-5.
momentum (float, optional): the value used for the running_mean and
running_var computation. Defaults to 0.1.
affine (bool, optional): whether to use learnable affine parameters.
Defaults to True.
track_running_stats (bool, optional): whether to track the running
mean and variance during training. When set to False, this
module does not track such statistics, and initializes statistics
buffers ``running_mean`` and ``running_var`` as ``None``. When
these buffers are ``None``, this module always uses batch
statistics in both training and eval modes. Defaults to True.
group (int, optional): synchronization of stats happen within
each process group individually. By default it is synchronization
across the whole world. Defaults to None.
stats_mode (str, optional): The statistical mode. Available options
includes ``'default'`` and ``'N'``. Defaults to 'default'.
When ``stats_mode=='default'``, it computes the overall statistics
using those from each worker with equal weight, i.e., the
statistics are synchronized and simply divied by ``group``. This
mode will produce inaccurate statistics when empty tensors occur.
When ``stats_mode=='N'``, it compute the overall statistics using
the total number of batches in each worker ignoring the number of
group, i.e., the statistics are synchronized and then divied by
the total batch ``N``. This mode is beneficial when empty tensors
occur during training, as it average the total mean by the real
number of batch.
"""
def
__init__
(
self
,
num_features
,
eps
=
1e-5
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
,
group
=
None
,
stats_mode
=
'default'
):
super
(
SyncBatchNorm
,
self
).
__init__
()
self
.
num_features
=
num_features
self
.
eps
=
eps
self
.
momentum
=
momentum
self
.
affine
=
affine
self
.
track_running_stats
=
track_running_stats
group
=
dist
.
group
.
WORLD
if
group
is
None
else
group
self
.
group
=
group
self
.
group_size
=
dist
.
get_world_size
(
group
)
assert
stats_mode
in
[
'default'
,
'N'
],
\
f
'"stats_mode" only accepts "default" and "N", got "
{
stats_mode
}
"'
self
.
stats_mode
=
stats_mode
if
self
.
affine
:
self
.
weight
=
Parameter
(
torch
.
Tensor
(
num_features
))
self
.
bias
=
Parameter
(
torch
.
Tensor
(
num_features
))
else
:
self
.
register_parameter
(
'weight'
,
None
)
self
.
register_parameter
(
'bias'
,
None
)
if
self
.
track_running_stats
:
self
.
register_buffer
(
'running_mean'
,
torch
.
zeros
(
num_features
))
self
.
register_buffer
(
'running_var'
,
torch
.
ones
(
num_features
))
self
.
register_buffer
(
'num_batches_tracked'
,
torch
.
tensor
(
0
,
dtype
=
torch
.
long
))
else
:
self
.
register_buffer
(
'running_mean'
,
None
)
self
.
register_buffer
(
'running_var'
,
None
)
self
.
register_buffer
(
'num_batches_tracked'
,
None
)
self
.
reset_parameters
()
def
reset_running_stats
(
self
):
if
self
.
track_running_stats
:
self
.
running_mean
.
zero_
()
self
.
running_var
.
fill_
(
1
)
self
.
num_batches_tracked
.
zero_
()
def
reset_parameters
(
self
):
self
.
reset_running_stats
()
if
self
.
affine
:
self
.
weight
.
data
.
uniform_
()
# pytorch use ones_()
self
.
bias
.
data
.
zero_
()
def
forward
(
self
,
input
):
if
input
.
dim
()
<
2
:
raise
ValueError
(
f
'expected at least 2D input, got
{
input
.
dim
()
}
D input'
)
if
self
.
momentum
is
None
:
exponential_average_factor
=
0.0
else
:
exponential_average_factor
=
self
.
momentum
if
self
.
training
and
self
.
track_running_stats
:
if
self
.
num_batches_tracked
is
not
None
:
self
.
num_batches_tracked
+=
1
if
self
.
momentum
is
None
:
# use cumulative moving average
exponential_average_factor
=
1.0
/
float
(
self
.
num_batches_tracked
)
else
:
# use exponential moving average
exponential_average_factor
=
self
.
momentum
if
self
.
training
or
not
self
.
track_running_stats
:
return
SyncBatchNormFunction
.
apply
(
input
,
self
.
running_mean
,
self
.
running_var
,
self
.
weight
,
self
.
bias
,
exponential_average_factor
,
self
.
eps
,
self
.
group
,
self
.
group_size
,
self
.
stats_mode
)
else
:
return
F
.
batch_norm
(
input
,
self
.
running_mean
,
self
.
running_var
,
self
.
weight
,
self
.
bias
,
False
,
exponential_average_factor
,
self
.
eps
)
def
__repr__
(
self
):
s
=
self
.
__class__
.
__name__
s
+=
f
'(
{
self
.
num_features
}
, '
s
+=
f
'eps=
{
self
.
eps
}
, '
s
+=
f
'momentum=
{
self
.
momentum
}
, '
s
+=
f
'affine=
{
self
.
affine
}
, '
s
+=
f
'track_running_stats=
{
self
.
track_running_stats
}
, '
s
+=
f
'group_size=
{
self
.
group_size
}
,'
s
+=
f
'stats_mode=
{
self
.
stats_mode
}
)'
return
s
lavis/common/annotator/uniformer/mmcv/ops/three_interpolate.py
0 → 100644
View file @
c04f261a
from
typing
import
Tuple
import
torch
from
torch.autograd
import
Function
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'three_interpolate_forward'
,
'three_interpolate_backward'
])
class
ThreeInterpolate
(
Function
):
"""Performs weighted linear interpolation on 3 features.
Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
for more details.
"""
@
staticmethod
def
forward
(
ctx
,
features
:
torch
.
Tensor
,
indices
:
torch
.
Tensor
,
weight
:
torch
.
Tensor
)
->
torch
.
Tensor
:
"""
Args:
features (Tensor): (B, C, M) Features descriptors to be
interpolated
indices (Tensor): (B, n, 3) index three nearest neighbors
of the target features in features
weight (Tensor): (B, n, 3) weights of interpolation
Returns:
Tensor: (B, C, N) tensor of the interpolated features
"""
assert
features
.
is_contiguous
()
assert
indices
.
is_contiguous
()
assert
weight
.
is_contiguous
()
B
,
c
,
m
=
features
.
size
()
n
=
indices
.
size
(
1
)
ctx
.
three_interpolate_for_backward
=
(
indices
,
weight
,
m
)
output
=
torch
.
cuda
.
FloatTensor
(
B
,
c
,
n
)
ext_module
.
three_interpolate_forward
(
features
,
indices
,
weight
,
output
,
b
=
B
,
c
=
c
,
m
=
m
,
n
=
n
)
return
output
@
staticmethod
def
backward
(
ctx
,
grad_out
:
torch
.
Tensor
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
,
torch
.
Tensor
]:
"""
Args:
grad_out (Tensor): (B, C, N) tensor with gradients of outputs
Returns:
Tensor: (B, C, M) tensor with gradients of features
"""
idx
,
weight
,
m
=
ctx
.
three_interpolate_for_backward
B
,
c
,
n
=
grad_out
.
size
()
grad_features
=
torch
.
cuda
.
FloatTensor
(
B
,
c
,
m
).
zero_
()
grad_out_data
=
grad_out
.
data
.
contiguous
()
ext_module
.
three_interpolate_backward
(
grad_out_data
,
idx
,
weight
,
grad_features
.
data
,
b
=
B
,
c
=
c
,
n
=
n
,
m
=
m
)
return
grad_features
,
None
,
None
three_interpolate
=
ThreeInterpolate
.
apply
lavis/common/annotator/uniformer/mmcv/ops/three_nn.py
0 → 100644
View file @
c04f261a
from
typing
import
Tuple
import
torch
from
torch.autograd
import
Function
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'three_nn_forward'
])
class
ThreeNN
(
Function
):
"""Find the top-3 nearest neighbors of the target set from the source set.
Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
for more details.
"""
@
staticmethod
def
forward
(
ctx
,
target
:
torch
.
Tensor
,
source
:
torch
.
Tensor
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
"""
Args:
target (Tensor): shape (B, N, 3), points set that needs to
find the nearest neighbors.
source (Tensor): shape (B, M, 3), points set that is used
to find the nearest neighbors of points in target set.
Returns:
Tensor: shape (B, N, 3), L2 distance of each point in target
set to their corresponding nearest neighbors.
"""
target
=
target
.
contiguous
()
source
=
source
.
contiguous
()
B
,
N
,
_
=
target
.
size
()
m
=
source
.
size
(
1
)
dist2
=
torch
.
cuda
.
FloatTensor
(
B
,
N
,
3
)
idx
=
torch
.
cuda
.
IntTensor
(
B
,
N
,
3
)
ext_module
.
three_nn_forward
(
target
,
source
,
dist2
,
idx
,
b
=
B
,
n
=
N
,
m
=
m
)
if
torch
.
__version__
!=
'parrots'
:
ctx
.
mark_non_differentiable
(
idx
)
return
torch
.
sqrt
(
dist2
),
idx
@
staticmethod
def
backward
(
ctx
,
a
=
None
,
b
=
None
):
return
None
,
None
three_nn
=
ThreeNN
.
apply
lavis/common/annotator/uniformer/mmcv/ops/tin_shift.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
# Code reference from "Temporal Interlacing Network"
# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py
# Hao Shao, Shengju Qian, Yu Liu
# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Function
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'tin_shift_forward'
,
'tin_shift_backward'
])
class
TINShiftFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
input
,
shift
):
C
=
input
.
size
(
2
)
num_segments
=
shift
.
size
(
1
)
if
C
//
num_segments
<=
0
or
C
%
num_segments
!=
0
:
raise
ValueError
(
'C should be a multiple of num_segments, '
f
'but got C=
{
C
}
and num_segments=
{
num_segments
}
.'
)
ctx
.
save_for_backward
(
shift
)
out
=
torch
.
zeros_like
(
input
)
ext_module
.
tin_shift_forward
(
input
,
shift
,
out
)
return
out
@
staticmethod
def
backward
(
ctx
,
grad_output
):
shift
=
ctx
.
saved_tensors
[
0
]
data_grad_input
=
grad_output
.
new
(
*
grad_output
.
size
()).
zero_
()
shift_grad_input
=
shift
.
new
(
*
shift
.
size
()).
zero_
()
ext_module
.
tin_shift_backward
(
grad_output
,
shift
,
data_grad_input
)
return
data_grad_input
,
shift_grad_input
tin_shift
=
TINShiftFunction
.
apply
class
TINShift
(
nn
.
Module
):
"""Temporal Interlace Shift.
Temporal Interlace shift is a differentiable temporal-wise frame shifting
which is proposed in "Temporal Interlacing Network"
Please refer to https://arxiv.org/abs/2001.06499 for more details.
Code is modified from https://github.com/mit-han-lab/temporal-shift-module
"""
def
forward
(
self
,
input
,
shift
):
"""Perform temporal interlace shift.
Args:
input (Tensor): Feature map with shape [N, num_segments, C, H * W].
shift (Tensor): Shift tensor with shape [N, num_segments].
Returns:
Feature map after temporal interlace shift.
"""
return
tin_shift
(
input
,
shift
)
lavis/common/annotator/uniformer/mmcv/ops/upfirdn2d.py
0 → 100644
View file @
c04f261a
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501
# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
# Augmentation (ADA)
# =======================================================================
# 1. Definitions
# "Licensor" means any person or entity that distributes its Work.
# "Software" means the original work of authorship made available under
# this License.
# "Work" means the Software and any additions to or derivative works of
# the Software that are made available under this License.
# The terms "reproduce," "reproduction," "derivative works," and
# "distribution" have the meaning as provided under U.S. copyright law;
# provided, however, that for the purposes of this License, derivative
# works shall not include works that remain separable from, or merely
# link (or bind by name) to the interfaces of, the Work.
# Works, including the Software, are "made available" under this License
# by including in or with the Work either (a) a copyright notice
# referencing the applicability of this License to the Work, or (b) a
# copy of this License.
# 2. License Grants
# 2.1 Copyright Grant. Subject to the terms and conditions of this
# License, each Licensor grants to you a perpetual, worldwide,
# non-exclusive, royalty-free, copyright license to reproduce,
# prepare derivative works of, publicly display, publicly perform,
# sublicense and distribute its Work and any resulting derivative
# works in any form.
# 3. Limitations
# 3.1 Redistribution. You may reproduce or distribute the Work only
# if (a) you do so under this License, (b) you include a complete
# copy of this License with your distribution, and (c) you retain
# without modification any copyright, patent, trademark, or
# attribution notices that are present in the Work.
# 3.2 Derivative Works. You may specify that additional or different
# terms apply to the use, reproduction, and distribution of your
# derivative works of the Work ("Your Terms") only if (a) Your Terms
# provide that the use limitation in Section 3.3 applies to your
# derivative works, and (b) you identify the specific derivative
# works that are subject to Your Terms. Notwithstanding Your Terms,
# this License (including the redistribution requirements in Section
# 3.1) will continue to apply to the Work itself.
# 3.3 Use Limitation. The Work and any derivative works thereof only
# may be used or intended for use non-commercially. Notwithstanding
# the foregoing, NVIDIA and its affiliates may use the Work and any
# derivative works commercially. As used herein, "non-commercially"
# means for research or evaluation purposes only.
# 3.4 Patent Claims. If you bring or threaten to bring a patent claim
# against any Licensor (including any claim, cross-claim or
# counterclaim in a lawsuit) to enforce any patents that you allege
# are infringed by any Work, then your rights under this License from
# such Licensor (including the grant in Section 2.1) will terminate
# immediately.
# 3.5 Trademarks. This License does not grant any rights to use any
# Licensor’s or its affiliates’ names, logos, or trademarks, except
# as necessary to reproduce the notices described in this License.
# 3.6 Termination. If you violate any term of this License, then your
# rights under this License (including the grant in Section 2.1) will
# terminate immediately.
# 4. Disclaimer of Warranty.
# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
# THIS LICENSE.
# 5. Limitation of Liability.
# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGES.
# =======================================================================
import
torch
from
torch.autograd
import
Function
from
torch.nn
import
functional
as
F
from
annotator.uniformer.mmcv.utils
import
to_2tuple
from
..utils
import
ext_loader
upfirdn2d_ext
=
ext_loader
.
load_ext
(
'_ext'
,
[
'upfirdn2d'
])
class
UpFirDn2dBackward
(
Function
):
@
staticmethod
def
forward
(
ctx
,
grad_output
,
kernel
,
grad_kernel
,
up
,
down
,
pad
,
g_pad
,
in_size
,
out_size
):
up_x
,
up_y
=
up
down_x
,
down_y
=
down
g_pad_x0
,
g_pad_x1
,
g_pad_y0
,
g_pad_y1
=
g_pad
grad_output
=
grad_output
.
reshape
(
-
1
,
out_size
[
0
],
out_size
[
1
],
1
)
grad_input
=
upfirdn2d_ext
.
upfirdn2d
(
grad_output
,
grad_kernel
,
up_x
=
down_x
,
up_y
=
down_y
,
down_x
=
up_x
,
down_y
=
up_y
,
pad_x0
=
g_pad_x0
,
pad_x1
=
g_pad_x1
,
pad_y0
=
g_pad_y0
,
pad_y1
=
g_pad_y1
)
grad_input
=
grad_input
.
view
(
in_size
[
0
],
in_size
[
1
],
in_size
[
2
],
in_size
[
3
])
ctx
.
save_for_backward
(
kernel
)
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
=
pad
ctx
.
up_x
=
up_x
ctx
.
up_y
=
up_y
ctx
.
down_x
=
down_x
ctx
.
down_y
=
down_y
ctx
.
pad_x0
=
pad_x0
ctx
.
pad_x1
=
pad_x1
ctx
.
pad_y0
=
pad_y0
ctx
.
pad_y1
=
pad_y1
ctx
.
in_size
=
in_size
ctx
.
out_size
=
out_size
return
grad_input
@
staticmethod
def
backward
(
ctx
,
gradgrad_input
):
kernel
,
=
ctx
.
saved_tensors
gradgrad_input
=
gradgrad_input
.
reshape
(
-
1
,
ctx
.
in_size
[
2
],
ctx
.
in_size
[
3
],
1
)
gradgrad_out
=
upfirdn2d_ext
.
upfirdn2d
(
gradgrad_input
,
kernel
,
up_x
=
ctx
.
up_x
,
up_y
=
ctx
.
up_y
,
down_x
=
ctx
.
down_x
,
down_y
=
ctx
.
down_y
,
pad_x0
=
ctx
.
pad_x0
,
pad_x1
=
ctx
.
pad_x1
,
pad_y0
=
ctx
.
pad_y0
,
pad_y1
=
ctx
.
pad_y1
)
# gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0],
# ctx.out_size[1], ctx.in_size[3])
gradgrad_out
=
gradgrad_out
.
view
(
ctx
.
in_size
[
0
],
ctx
.
in_size
[
1
],
ctx
.
out_size
[
0
],
ctx
.
out_size
[
1
])
return
gradgrad_out
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
class
UpFirDn2d
(
Function
):
@
staticmethod
def
forward
(
ctx
,
input
,
kernel
,
up
,
down
,
pad
):
up_x
,
up_y
=
up
down_x
,
down_y
=
down
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
=
pad
kernel_h
,
kernel_w
=
kernel
.
shape
batch
,
channel
,
in_h
,
in_w
=
input
.
shape
ctx
.
in_size
=
input
.
shape
input
=
input
.
reshape
(
-
1
,
in_h
,
in_w
,
1
)
ctx
.
save_for_backward
(
kernel
,
torch
.
flip
(
kernel
,
[
0
,
1
]))
out_h
=
(
in_h
*
up_y
+
pad_y0
+
pad_y1
-
kernel_h
)
//
down_y
+
1
out_w
=
(
in_w
*
up_x
+
pad_x0
+
pad_x1
-
kernel_w
)
//
down_x
+
1
ctx
.
out_size
=
(
out_h
,
out_w
)
ctx
.
up
=
(
up_x
,
up_y
)
ctx
.
down
=
(
down_x
,
down_y
)
ctx
.
pad
=
(
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
)
g_pad_x0
=
kernel_w
-
pad_x0
-
1
g_pad_y0
=
kernel_h
-
pad_y0
-
1
g_pad_x1
=
in_w
*
up_x
-
out_w
*
down_x
+
pad_x0
-
up_x
+
1
g_pad_y1
=
in_h
*
up_y
-
out_h
*
down_y
+
pad_y0
-
up_y
+
1
ctx
.
g_pad
=
(
g_pad_x0
,
g_pad_x1
,
g_pad_y0
,
g_pad_y1
)
out
=
upfirdn2d_ext
.
upfirdn2d
(
input
,
kernel
,
up_x
=
up_x
,
up_y
=
up_y
,
down_x
=
down_x
,
down_y
=
down_y
,
pad_x0
=
pad_x0
,
pad_x1
=
pad_x1
,
pad_y0
=
pad_y0
,
pad_y1
=
pad_y1
)
# out = out.view(major, out_h, out_w, minor)
out
=
out
.
view
(
-
1
,
channel
,
out_h
,
out_w
)
return
out
@
staticmethod
def
backward
(
ctx
,
grad_output
):
kernel
,
grad_kernel
=
ctx
.
saved_tensors
grad_input
=
UpFirDn2dBackward
.
apply
(
grad_output
,
kernel
,
grad_kernel
,
ctx
.
up
,
ctx
.
down
,
ctx
.
pad
,
ctx
.
g_pad
,
ctx
.
in_size
,
ctx
.
out_size
,
)
return
grad_input
,
None
,
None
,
None
,
None
def
upfirdn2d
(
input
,
kernel
,
up
=
1
,
down
=
1
,
pad
=
(
0
,
0
)):
"""UpFRIDn for 2d features.
UpFIRDn is short for upsample, apply FIR filter and downsample. More
details can be found in:
https://www.mathworks.com/help/signal/ref/upfirdn.html
Args:
input (Tensor): Tensor with shape of (n, c, h, w).
kernel (Tensor): Filter kernel.
up (int | tuple[int], optional): Upsampling factor. If given a number,
we will use this factor for the both height and width side.
Defaults to 1.
down (int | tuple[int], optional): Downsampling factor. If given a
number, we will use this factor for the both height and width side.
Defaults to 1.
pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or
(x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0).
Returns:
Tensor: Tensor after UpFIRDn.
"""
if
input
.
device
.
type
==
'cpu'
:
if
len
(
pad
)
==
2
:
pad
=
(
pad
[
0
],
pad
[
1
],
pad
[
0
],
pad
[
1
])
up
=
to_2tuple
(
up
)
down
=
to_2tuple
(
down
)
out
=
upfirdn2d_native
(
input
,
kernel
,
up
[
0
],
up
[
1
],
down
[
0
],
down
[
1
],
pad
[
0
],
pad
[
1
],
pad
[
2
],
pad
[
3
])
else
:
_up
=
to_2tuple
(
up
)
_down
=
to_2tuple
(
down
)
if
len
(
pad
)
==
4
:
_pad
=
pad
elif
len
(
pad
)
==
2
:
_pad
=
(
pad
[
0
],
pad
[
1
],
pad
[
0
],
pad
[
1
])
out
=
UpFirDn2d
.
apply
(
input
,
kernel
,
_up
,
_down
,
_pad
)
return
out
def
upfirdn2d_native
(
input
,
kernel
,
up_x
,
up_y
,
down_x
,
down_y
,
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
):
_
,
channel
,
in_h
,
in_w
=
input
.
shape
input
=
input
.
reshape
(
-
1
,
in_h
,
in_w
,
1
)
_
,
in_h
,
in_w
,
minor
=
input
.
shape
kernel_h
,
kernel_w
=
kernel
.
shape
out
=
input
.
view
(
-
1
,
in_h
,
1
,
in_w
,
1
,
minor
)
out
=
F
.
pad
(
out
,
[
0
,
0
,
0
,
up_x
-
1
,
0
,
0
,
0
,
up_y
-
1
])
out
=
out
.
view
(
-
1
,
in_h
*
up_y
,
in_w
*
up_x
,
minor
)
out
=
F
.
pad
(
out
,
[
0
,
0
,
max
(
pad_x0
,
0
),
max
(
pad_x1
,
0
),
max
(
pad_y0
,
0
),
max
(
pad_y1
,
0
)])
out
=
out
[:,
max
(
-
pad_y0
,
0
):
out
.
shape
[
1
]
-
max
(
-
pad_y1
,
0
),
max
(
-
pad_x0
,
0
):
out
.
shape
[
2
]
-
max
(
-
pad_x1
,
0
),
:,
]
out
=
out
.
permute
(
0
,
3
,
1
,
2
)
out
=
out
.
reshape
(
[
-
1
,
1
,
in_h
*
up_y
+
pad_y0
+
pad_y1
,
in_w
*
up_x
+
pad_x0
+
pad_x1
])
w
=
torch
.
flip
(
kernel
,
[
0
,
1
]).
view
(
1
,
1
,
kernel_h
,
kernel_w
)
out
=
F
.
conv2d
(
out
,
w
)
out
=
out
.
reshape
(
-
1
,
minor
,
in_h
*
up_y
+
pad_y0
+
pad_y1
-
kernel_h
+
1
,
in_w
*
up_x
+
pad_x0
+
pad_x1
-
kernel_w
+
1
,
)
out
=
out
.
permute
(
0
,
2
,
3
,
1
)
out
=
out
[:,
::
down_y
,
::
down_x
,
:]
out_h
=
(
in_h
*
up_y
+
pad_y0
+
pad_y1
-
kernel_h
)
//
down_y
+
1
out_w
=
(
in_w
*
up_x
+
pad_x0
+
pad_x1
-
kernel_w
)
//
down_x
+
1
return
out
.
view
(
-
1
,
channel
,
out_h
,
out_w
)
lavis/common/annotator/uniformer/mmcv/ops/voxelize.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch
import
nn
from
torch.autograd
import
Function
from
torch.nn.modules.utils
import
_pair
from
..utils
import
ext_loader
ext_module
=
ext_loader
.
load_ext
(
'_ext'
,
[
'dynamic_voxelize_forward'
,
'hard_voxelize_forward'
])
class
_Voxelization
(
Function
):
@
staticmethod
def
forward
(
ctx
,
points
,
voxel_size
,
coors_range
,
max_points
=
35
,
max_voxels
=
20000
):
"""Convert kitti points(N, >=3) to voxels.
Args:
points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points
and points[:, 3:] contain other information like reflectivity.
voxel_size (tuple or float): The size of voxel with the shape of
[3].
coors_range (tuple or float): The coordinate range of voxel with
the shape of [6].
max_points (int, optional): maximum points contained in a voxel. if
max_points=-1, it means using dynamic_voxelize. Default: 35.
max_voxels (int, optional): maximum voxels this function create.
for second, 20000 is a good choice. Users should shuffle points
before call this function because max_voxels may drop points.
Default: 20000.
Returns:
voxels_out (torch.Tensor): Output voxels with the shape of [M,
max_points, ndim]. Only contain points and returned when
max_points != -1.
coors_out (torch.Tensor): Output coordinates with the shape of
[M, 3].
num_points_per_voxel_out (torch.Tensor): Num points per voxel with
the shape of [M]. Only returned when max_points != -1.
"""
if
max_points
==
-
1
or
max_voxels
==
-
1
:
coors
=
points
.
new_zeros
(
size
=
(
points
.
size
(
0
),
3
),
dtype
=
torch
.
int
)
ext_module
.
dynamic_voxelize_forward
(
points
,
coors
,
voxel_size
,
coors_range
,
3
)
return
coors
else
:
voxels
=
points
.
new_zeros
(
size
=
(
max_voxels
,
max_points
,
points
.
size
(
1
)))
coors
=
points
.
new_zeros
(
size
=
(
max_voxels
,
3
),
dtype
=
torch
.
int
)
num_points_per_voxel
=
points
.
new_zeros
(
size
=
(
max_voxels
,
),
dtype
=
torch
.
int
)
voxel_num
=
ext_module
.
hard_voxelize_forward
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size
,
coors_range
,
max_points
,
max_voxels
,
3
)
# select the valid voxels
voxels_out
=
voxels
[:
voxel_num
]
coors_out
=
coors
[:
voxel_num
]
num_points_per_voxel_out
=
num_points_per_voxel
[:
voxel_num
]
return
voxels_out
,
coors_out
,
num_points_per_voxel_out
voxelization
=
_Voxelization
.
apply
class
Voxelization
(
nn
.
Module
):
"""Convert kitti points(N, >=3) to voxels.
Please refer to `PVCNN <https://arxiv.org/abs/1907.03739>`_ for more
details.
Args:
voxel_size (tuple or float): The size of voxel with the shape of [3].
point_cloud_range (tuple or float): The coordinate range of voxel with
the shape of [6].
max_num_points (int): maximum points contained in a voxel. if
max_points=-1, it means using dynamic_voxelize.
max_voxels (int, optional): maximum voxels this function create.
for second, 20000 is a good choice. Users should shuffle points
before call this function because max_voxels may drop points.
Default: 20000.
"""
def
__init__
(
self
,
voxel_size
,
point_cloud_range
,
max_num_points
,
max_voxels
=
20000
):
super
().
__init__
()
self
.
voxel_size
=
voxel_size
self
.
point_cloud_range
=
point_cloud_range
self
.
max_num_points
=
max_num_points
if
isinstance
(
max_voxels
,
tuple
):
self
.
max_voxels
=
max_voxels
else
:
self
.
max_voxels
=
_pair
(
max_voxels
)
point_cloud_range
=
torch
.
tensor
(
point_cloud_range
,
dtype
=
torch
.
float32
)
voxel_size
=
torch
.
tensor
(
voxel_size
,
dtype
=
torch
.
float32
)
grid_size
=
(
point_cloud_range
[
3
:]
-
point_cloud_range
[:
3
])
/
voxel_size
grid_size
=
torch
.
round
(
grid_size
).
long
()
input_feat_shape
=
grid_size
[:
2
]
self
.
grid_size
=
grid_size
# the origin shape is as [x-len, y-len, z-len]
# [w, h, d] -> [d, h, w]
self
.
pcd_shape
=
[
*
input_feat_shape
,
1
][::
-
1
]
def
forward
(
self
,
input
):
if
self
.
training
:
max_voxels
=
self
.
max_voxels
[
0
]
else
:
max_voxels
=
self
.
max_voxels
[
1
]
return
voxelization
(
input
,
self
.
voxel_size
,
self
.
point_cloud_range
,
self
.
max_num_points
,
max_voxels
)
def
__repr__
(
self
):
s
=
self
.
__class__
.
__name__
+
'('
s
+=
'voxel_size='
+
str
(
self
.
voxel_size
)
s
+=
', point_cloud_range='
+
str
(
self
.
point_cloud_range
)
s
+=
', max_num_points='
+
str
(
self
.
max_num_points
)
s
+=
', max_voxels='
+
str
(
self
.
max_voxels
)
s
+=
')'
return
s
lavis/common/annotator/uniformer/mmcv/parallel/__init__.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
.collate
import
collate
from
.data_container
import
DataContainer
from
.data_parallel
import
MMDataParallel
from
.distributed
import
MMDistributedDataParallel
from
.registry
import
MODULE_WRAPPERS
from
.scatter_gather
import
scatter
,
scatter_kwargs
from
.utils
import
is_module_wrapper
__all__
=
[
'collate'
,
'DataContainer'
,
'MMDataParallel'
,
'MMDistributedDataParallel'
,
'scatter'
,
'scatter_kwargs'
,
'is_module_wrapper'
,
'MODULE_WRAPPERS'
]
lavis/common/annotator/uniformer/mmcv/parallel/_functions.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch.nn.parallel._functions
import
_get_stream
def
scatter
(
input
,
devices
,
streams
=
None
):
"""Scatters tensor across multiple GPUs."""
if
streams
is
None
:
streams
=
[
None
]
*
len
(
devices
)
if
isinstance
(
input
,
list
):
chunk_size
=
(
len
(
input
)
-
1
)
//
len
(
devices
)
+
1
outputs
=
[
scatter
(
input
[
i
],
[
devices
[
i
//
chunk_size
]],
[
streams
[
i
//
chunk_size
]])
for
i
in
range
(
len
(
input
))
]
return
outputs
elif
isinstance
(
input
,
torch
.
Tensor
):
output
=
input
.
contiguous
()
# TODO: copy to a pinned buffer first (if copying from CPU)
stream
=
streams
[
0
]
if
output
.
numel
()
>
0
else
None
if
devices
!=
[
-
1
]:
with
torch
.
cuda
.
device
(
devices
[
0
]),
torch
.
cuda
.
stream
(
stream
):
output
=
output
.
cuda
(
devices
[
0
],
non_blocking
=
True
)
else
:
# unsqueeze the first dimension thus the tensor's shape is the
# same as those scattered with GPU.
output
=
output
.
unsqueeze
(
0
)
return
output
else
:
raise
Exception
(
f
'Unknown type
{
type
(
input
)
}
.'
)
def
synchronize_stream
(
output
,
devices
,
streams
):
if
isinstance
(
output
,
list
):
chunk_size
=
len
(
output
)
//
len
(
devices
)
for
i
in
range
(
len
(
devices
)):
for
j
in
range
(
chunk_size
):
synchronize_stream
(
output
[
i
*
chunk_size
+
j
],
[
devices
[
i
]],
[
streams
[
i
]])
elif
isinstance
(
output
,
torch
.
Tensor
):
if
output
.
numel
()
!=
0
:
with
torch
.
cuda
.
device
(
devices
[
0
]):
main_stream
=
torch
.
cuda
.
current_stream
()
main_stream
.
wait_stream
(
streams
[
0
])
output
.
record_stream
(
main_stream
)
else
:
raise
Exception
(
f
'Unknown type
{
type
(
output
)
}
.'
)
def
get_input_device
(
input
):
if
isinstance
(
input
,
list
):
for
item
in
input
:
input_device
=
get_input_device
(
item
)
if
input_device
!=
-
1
:
return
input_device
return
-
1
elif
isinstance
(
input
,
torch
.
Tensor
):
return
input
.
get_device
()
if
input
.
is_cuda
else
-
1
else
:
raise
Exception
(
f
'Unknown type
{
type
(
input
)
}
.'
)
class
Scatter
:
@
staticmethod
def
forward
(
target_gpus
,
input
):
input_device
=
get_input_device
(
input
)
streams
=
None
if
input_device
==
-
1
and
target_gpus
!=
[
-
1
]:
# Perform CPU to GPU copies in a background stream
streams
=
[
_get_stream
(
device
)
for
device
in
target_gpus
]
outputs
=
scatter
(
input
,
target_gpus
,
streams
)
# Synchronize with the copy stream
if
streams
is
not
None
:
synchronize_stream
(
outputs
,
target_gpus
,
streams
)
return
tuple
(
outputs
)
lavis/common/annotator/uniformer/mmcv/parallel/collate.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
collections.abc
import
Mapping
,
Sequence
import
torch
import
torch.nn.functional
as
F
from
torch.utils.data.dataloader
import
default_collate
from
.data_container
import
DataContainer
def
collate
(
batch
,
samples_per_gpu
=
1
):
"""Puts each data field into a tensor/DataContainer with outer dimension
batch size.
Extend default_collate to add support for
:type:`~mmcv.parallel.DataContainer`. There are 3 cases.
1. cpu_only = True, e.g., meta data
2. cpu_only = False, stack = True, e.g., images tensors
3. cpu_only = False, stack = False, e.g., gt bboxes
"""
if
not
isinstance
(
batch
,
Sequence
):
raise
TypeError
(
f
'
{
batch
.
dtype
}
is not supported.'
)
if
isinstance
(
batch
[
0
],
DataContainer
):
stacked
=
[]
if
batch
[
0
].
cpu_only
:
for
i
in
range
(
0
,
len
(
batch
),
samples_per_gpu
):
stacked
.
append
(
[
sample
.
data
for
sample
in
batch
[
i
:
i
+
samples_per_gpu
]])
return
DataContainer
(
stacked
,
batch
[
0
].
stack
,
batch
[
0
].
padding_value
,
cpu_only
=
True
)
elif
batch
[
0
].
stack
:
for
i
in
range
(
0
,
len
(
batch
),
samples_per_gpu
):
assert
isinstance
(
batch
[
i
].
data
,
torch
.
Tensor
)
if
batch
[
i
].
pad_dims
is
not
None
:
ndim
=
batch
[
i
].
dim
()
assert
ndim
>
batch
[
i
].
pad_dims
max_shape
=
[
0
for
_
in
range
(
batch
[
i
].
pad_dims
)]
for
dim
in
range
(
1
,
batch
[
i
].
pad_dims
+
1
):
max_shape
[
dim
-
1
]
=
batch
[
i
].
size
(
-
dim
)
for
sample
in
batch
[
i
:
i
+
samples_per_gpu
]:
for
dim
in
range
(
0
,
ndim
-
batch
[
i
].
pad_dims
):
assert
batch
[
i
].
size
(
dim
)
==
sample
.
size
(
dim
)
for
dim
in
range
(
1
,
batch
[
i
].
pad_dims
+
1
):
max_shape
[
dim
-
1
]
=
max
(
max_shape
[
dim
-
1
],
sample
.
size
(
-
dim
))
padded_samples
=
[]
for
sample
in
batch
[
i
:
i
+
samples_per_gpu
]:
pad
=
[
0
for
_
in
range
(
batch
[
i
].
pad_dims
*
2
)]
for
dim
in
range
(
1
,
batch
[
i
].
pad_dims
+
1
):
pad
[
2
*
dim
-
1
]
=
max_shape
[
dim
-
1
]
-
sample
.
size
(
-
dim
)
padded_samples
.
append
(
F
.
pad
(
sample
.
data
,
pad
,
value
=
sample
.
padding_value
))
stacked
.
append
(
default_collate
(
padded_samples
))
elif
batch
[
i
].
pad_dims
is
None
:
stacked
.
append
(
default_collate
([
sample
.
data
for
sample
in
batch
[
i
:
i
+
samples_per_gpu
]
]))
else
:
raise
ValueError
(
'pad_dims should be either None or integers (1-3)'
)
else
:
for
i
in
range
(
0
,
len
(
batch
),
samples_per_gpu
):
stacked
.
append
(
[
sample
.
data
for
sample
in
batch
[
i
:
i
+
samples_per_gpu
]])
return
DataContainer
(
stacked
,
batch
[
0
].
stack
,
batch
[
0
].
padding_value
)
elif
isinstance
(
batch
[
0
],
Sequence
):
transposed
=
zip
(
*
batch
)
return
[
collate
(
samples
,
samples_per_gpu
)
for
samples
in
transposed
]
elif
isinstance
(
batch
[
0
],
Mapping
):
return
{
key
:
collate
([
d
[
key
]
for
d
in
batch
],
samples_per_gpu
)
for
key
in
batch
[
0
]
}
else
:
return
default_collate
(
batch
)
lavis/common/annotator/uniformer/mmcv/parallel/data_container.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
functools
import
torch
def
assert_tensor_type
(
func
):
@
functools
.
wraps
(
func
)
def
wrapper
(
*
args
,
**
kwargs
):
if
not
isinstance
(
args
[
0
].
data
,
torch
.
Tensor
):
raise
AttributeError
(
f
'
{
args
[
0
].
__class__
.
__name__
}
has no attribute '
f
'
{
func
.
__name__
}
for type
{
args
[
0
].
datatype
}
'
)
return
func
(
*
args
,
**
kwargs
)
return
wrapper
class
DataContainer
:
"""A container for any type of objects.
Typically tensors will be stacked in the collate function and sliced along
some dimension in the scatter function. This behavior has some limitations.
1. All tensors have to be the same size.
2. Types are limited (numpy array or Tensor).
We design `DataContainer` and `MMDataParallel` to overcome these
limitations. The behavior can be either of the following.
- copy to GPU, pad all tensors to the same size and stack them
- copy to GPU without stacking
- leave the objects as is and pass it to the model
- pad_dims specifies the number of last few dimensions to do padding
"""
def
__init__
(
self
,
data
,
stack
=
False
,
padding_value
=
0
,
cpu_only
=
False
,
pad_dims
=
2
):
self
.
_data
=
data
self
.
_cpu_only
=
cpu_only
self
.
_stack
=
stack
self
.
_padding_value
=
padding_value
assert
pad_dims
in
[
None
,
1
,
2
,
3
]
self
.
_pad_dims
=
pad_dims
def
__repr__
(
self
):
return
f
'
{
self
.
__class__
.
__name__
}
(
{
repr
(
self
.
data
)
}
)'
def
__len__
(
self
):
return
len
(
self
.
_data
)
@
property
def
data
(
self
):
return
self
.
_data
@
property
def
datatype
(
self
):
if
isinstance
(
self
.
data
,
torch
.
Tensor
):
return
self
.
data
.
type
()
else
:
return
type
(
self
.
data
)
@
property
def
cpu_only
(
self
):
return
self
.
_cpu_only
@
property
def
stack
(
self
):
return
self
.
_stack
@
property
def
padding_value
(
self
):
return
self
.
_padding_value
@
property
def
pad_dims
(
self
):
return
self
.
_pad_dims
@
assert_tensor_type
def
size
(
self
,
*
args
,
**
kwargs
):
return
self
.
data
.
size
(
*
args
,
**
kwargs
)
@
assert_tensor_type
def
dim
(
self
):
return
self
.
data
.
dim
()
lavis/common/annotator/uniformer/mmcv/parallel/data_parallel.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
itertools
import
chain
from
torch.nn.parallel
import
DataParallel
from
.scatter_gather
import
scatter_kwargs
class
MMDataParallel
(
DataParallel
):
"""The DataParallel module that supports DataContainer.
MMDataParallel has two main differences with PyTorch DataParallel:
- It supports a custom type :class:`DataContainer` which allows more
flexible control of input data during both GPU and CPU inference.
- It implement two more APIs ``train_step()`` and ``val_step()``.
Args:
module (:class:`nn.Module`): Module to be encapsulated.
device_ids (list[int]): Device IDS of modules to be scattered to.
Defaults to None when GPU is not available.
output_device (str | int): Device ID for output. Defaults to None.
dim (int): Dimension used to scatter the data. Defaults to 0.
"""
def
__init__
(
self
,
*
args
,
dim
=
0
,
**
kwargs
):
super
(
MMDataParallel
,
self
).
__init__
(
*
args
,
dim
=
dim
,
**
kwargs
)
self
.
dim
=
dim
def
forward
(
self
,
*
inputs
,
**
kwargs
):
"""Override the original forward function.
The main difference lies in the CPU inference where the data in
:class:`DataContainers` will still be gathered.
"""
if
not
self
.
device_ids
:
# We add the following line thus the module could gather and
# convert data containers as those in GPU inference
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
[
-
1
])
return
self
.
module
(
*
inputs
[
0
],
**
kwargs
[
0
])
else
:
return
super
().
forward
(
*
inputs
,
**
kwargs
)
def
scatter
(
self
,
inputs
,
kwargs
,
device_ids
):
return
scatter_kwargs
(
inputs
,
kwargs
,
device_ids
,
dim
=
self
.
dim
)
def
train_step
(
self
,
*
inputs
,
**
kwargs
):
if
not
self
.
device_ids
:
# We add the following line thus the module could gather and
# convert data containers as those in GPU inference
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
[
-
1
])
return
self
.
module
.
train_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
assert
len
(
self
.
device_ids
)
==
1
,
\
(
'MMDataParallel only supports single GPU training, if you need to'
' train with multiple GPUs, please use MMDistributedDataParallel'
'instead.'
)
for
t
in
chain
(
self
.
module
.
parameters
(),
self
.
module
.
buffers
()):
if
t
.
device
!=
self
.
src_device_obj
:
raise
RuntimeError
(
'module must have its parameters and buffers '
f
'on device
{
self
.
src_device_obj
}
(device_ids[0]) but '
f
'found one of them on device:
{
t
.
device
}
'
)
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
self
.
device_ids
)
return
self
.
module
.
train_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
def
val_step
(
self
,
*
inputs
,
**
kwargs
):
if
not
self
.
device_ids
:
# We add the following line thus the module could gather and
# convert data containers as those in GPU inference
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
[
-
1
])
return
self
.
module
.
val_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
assert
len
(
self
.
device_ids
)
==
1
,
\
(
'MMDataParallel only supports single GPU training, if you need to'
' train with multiple GPUs, please use MMDistributedDataParallel'
' instead.'
)
for
t
in
chain
(
self
.
module
.
parameters
(),
self
.
module
.
buffers
()):
if
t
.
device
!=
self
.
src_device_obj
:
raise
RuntimeError
(
'module must have its parameters and buffers '
f
'on device
{
self
.
src_device_obj
}
(device_ids[0]) but '
f
'found one of them on device:
{
t
.
device
}
'
)
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
self
.
device_ids
)
return
self
.
module
.
val_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
lavis/common/annotator/uniformer/mmcv/parallel/distributed.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch.nn.parallel.distributed
import
(
DistributedDataParallel
,
_find_tensors
)
from
annotator.uniformer.mmcv
import
print_log
from
annotator.uniformer.mmcv.utils
import
TORCH_VERSION
,
digit_version
from
.scatter_gather
import
scatter_kwargs
class
MMDistributedDataParallel
(
DistributedDataParallel
):
"""The DDP module that supports DataContainer.
MMDDP has two main differences with PyTorch DDP:
- It supports a custom type :class:`DataContainer` which allows more
flexible control of input data.
- It implement two APIs ``train_step()`` and ``val_step()``.
"""
def
to_kwargs
(
self
,
inputs
,
kwargs
,
device_id
):
# Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8
# to move all tensors to device_id
return
scatter_kwargs
(
inputs
,
kwargs
,
[
device_id
],
dim
=
self
.
dim
)
def
scatter
(
self
,
inputs
,
kwargs
,
device_ids
):
return
scatter_kwargs
(
inputs
,
kwargs
,
device_ids
,
dim
=
self
.
dim
)
def
train_step
(
self
,
*
inputs
,
**
kwargs
):
"""train_step() API for module wrapped by DistributedDataParallel.
This method is basically the same as
``DistributedDataParallel.forward()``, while replacing
``self.module.forward()`` with ``self.module.train_step()``.
It is compatible with PyTorch 1.1 - 1.5.
"""
# In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
# end of backward to the beginning of forward.
if
(
'parrots'
not
in
TORCH_VERSION
and
digit_version
(
TORCH_VERSION
)
>=
digit_version
(
'1.7'
)
and
self
.
reducer
.
_rebuild_buckets
()):
print_log
(
'Reducer buckets have been rebuilt in this iteration.'
,
logger
=
'mmcv'
)
if
getattr
(
self
,
'require_forward_param_sync'
,
True
):
self
.
_sync_params
()
if
self
.
device_ids
:
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
self
.
device_ids
)
if
len
(
self
.
device_ids
)
==
1
:
output
=
self
.
module
.
train_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
else
:
outputs
=
self
.
parallel_apply
(
self
.
_module_copies
[:
len
(
inputs
)],
inputs
,
kwargs
)
output
=
self
.
gather
(
outputs
,
self
.
output_device
)
else
:
output
=
self
.
module
.
train_step
(
*
inputs
,
**
kwargs
)
if
torch
.
is_grad_enabled
()
and
getattr
(
self
,
'require_backward_grad_sync'
,
True
):
if
self
.
find_unused_parameters
:
self
.
reducer
.
prepare_for_backward
(
list
(
_find_tensors
(
output
)))
else
:
self
.
reducer
.
prepare_for_backward
([])
else
:
if
(
'parrots'
not
in
TORCH_VERSION
and
digit_version
(
TORCH_VERSION
)
>
digit_version
(
'1.2'
)):
self
.
require_forward_param_sync
=
False
return
output
def
val_step
(
self
,
*
inputs
,
**
kwargs
):
"""val_step() API for module wrapped by DistributedDataParallel.
This method is basically the same as
``DistributedDataParallel.forward()``, while replacing
``self.module.forward()`` with ``self.module.val_step()``.
It is compatible with PyTorch 1.1 - 1.5.
"""
# In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
# end of backward to the beginning of forward.
if
(
'parrots'
not
in
TORCH_VERSION
and
digit_version
(
TORCH_VERSION
)
>=
digit_version
(
'1.7'
)
and
self
.
reducer
.
_rebuild_buckets
()):
print_log
(
'Reducer buckets have been rebuilt in this iteration.'
,
logger
=
'mmcv'
)
if
getattr
(
self
,
'require_forward_param_sync'
,
True
):
self
.
_sync_params
()
if
self
.
device_ids
:
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
self
.
device_ids
)
if
len
(
self
.
device_ids
)
==
1
:
output
=
self
.
module
.
val_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
else
:
outputs
=
self
.
parallel_apply
(
self
.
_module_copies
[:
len
(
inputs
)],
inputs
,
kwargs
)
output
=
self
.
gather
(
outputs
,
self
.
output_device
)
else
:
output
=
self
.
module
.
val_step
(
*
inputs
,
**
kwargs
)
if
torch
.
is_grad_enabled
()
and
getattr
(
self
,
'require_backward_grad_sync'
,
True
):
if
self
.
find_unused_parameters
:
self
.
reducer
.
prepare_for_backward
(
list
(
_find_tensors
(
output
)))
else
:
self
.
reducer
.
prepare_for_backward
([])
else
:
if
(
'parrots'
not
in
TORCH_VERSION
and
digit_version
(
TORCH_VERSION
)
>
digit_version
(
'1.2'
)):
self
.
require_forward_param_sync
=
False
return
output
lavis/common/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.distributed
as
dist
import
torch.nn
as
nn
from
torch._utils
import
(
_flatten_dense_tensors
,
_take_tensors
,
_unflatten_dense_tensors
)
from
annotator.uniformer.mmcv.utils
import
TORCH_VERSION
,
digit_version
from
.registry
import
MODULE_WRAPPERS
from
.scatter_gather
import
scatter_kwargs
@
MODULE_WRAPPERS
.
register_module
()
class
MMDistributedDataParallel
(
nn
.
Module
):
def
__init__
(
self
,
module
,
dim
=
0
,
broadcast_buffers
=
True
,
bucket_cap_mb
=
25
):
super
(
MMDistributedDataParallel
,
self
).
__init__
()
self
.
module
=
module
self
.
dim
=
dim
self
.
broadcast_buffers
=
broadcast_buffers
self
.
broadcast_bucket_size
=
bucket_cap_mb
*
1024
*
1024
self
.
_sync_params
()
def
_dist_broadcast_coalesced
(
self
,
tensors
,
buffer_size
):
for
tensors
in
_take_tensors
(
tensors
,
buffer_size
):
flat_tensors
=
_flatten_dense_tensors
(
tensors
)
dist
.
broadcast
(
flat_tensors
,
0
)
for
tensor
,
synced
in
zip
(
tensors
,
_unflatten_dense_tensors
(
flat_tensors
,
tensors
)):
tensor
.
copy_
(
synced
)
def
_sync_params
(
self
):
module_states
=
list
(
self
.
module
.
state_dict
().
values
())
if
len
(
module_states
)
>
0
:
self
.
_dist_broadcast_coalesced
(
module_states
,
self
.
broadcast_bucket_size
)
if
self
.
broadcast_buffers
:
if
(
TORCH_VERSION
!=
'parrots'
and
digit_version
(
TORCH_VERSION
)
<
digit_version
(
'1.0'
)):
buffers
=
[
b
.
data
for
b
in
self
.
module
.
_all_buffers
()]
else
:
buffers
=
[
b
.
data
for
b
in
self
.
module
.
buffers
()]
if
len
(
buffers
)
>
0
:
self
.
_dist_broadcast_coalesced
(
buffers
,
self
.
broadcast_bucket_size
)
def
scatter
(
self
,
inputs
,
kwargs
,
device_ids
):
return
scatter_kwargs
(
inputs
,
kwargs
,
device_ids
,
dim
=
self
.
dim
)
def
forward
(
self
,
*
inputs
,
**
kwargs
):
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
[
torch
.
cuda
.
current_device
()])
return
self
.
module
(
*
inputs
[
0
],
**
kwargs
[
0
])
def
train_step
(
self
,
*
inputs
,
**
kwargs
):
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
[
torch
.
cuda
.
current_device
()])
output
=
self
.
module
.
train_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
return
output
def
val_step
(
self
,
*
inputs
,
**
kwargs
):
inputs
,
kwargs
=
self
.
scatter
(
inputs
,
kwargs
,
[
torch
.
cuda
.
current_device
()])
output
=
self
.
module
.
val_step
(
*
inputs
[
0
],
**
kwargs
[
0
])
return
output
lavis/common/annotator/uniformer/mmcv/parallel/registry.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
torch.nn.parallel
import
DataParallel
,
DistributedDataParallel
from
annotator.uniformer.mmcv.utils
import
Registry
MODULE_WRAPPERS
=
Registry
(
'module wrapper'
)
MODULE_WRAPPERS
.
register_module
(
module
=
DataParallel
)
MODULE_WRAPPERS
.
register_module
(
module
=
DistributedDataParallel
)
Prev
1
…
10
11
12
13
14
15
16
17
18
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment