Unverified Commit 8538177b authored by ChaimZhu's avatar ChaimZhu Committed by GitHub
Browse files

[Feature] Add MonoFlex Head (#1044)

parent 4590418e
...@@ -81,16 +81,16 @@ class MonoFlexCoder(BaseBBoxCoder): ...@@ -81,16 +81,16 @@ class MonoFlexCoder(BaseBBoxCoder):
torch.Tensor: Targets of orientations. torch.Tensor: Targets of orientations.
""" """
local_yaw = gt_bboxes_3d.local_yaw local_yaw = gt_bboxes_3d.local_yaw
# encode local yaw (-pi ~ pi) to multibin format # encode local yaw (-pi ~ pi) to multibin format
encode_local_yaw = np.zeros(self.num_dir_bins * 2) encode_local_yaw = local_yaw.new_zeros(
[local_yaw.shape[0], self.num_dir_bins * 2])
bin_size = 2 * np.pi / self.num_dir_bins bin_size = 2 * np.pi / self.num_dir_bins
margin_size = bin_size * self.bin_margin margin_size = bin_size * self.bin_margin
bin_centers = self.bin_centers bin_centers = local_yaw.new_tensor(self.bin_centers)
range_size = bin_size / 2 + margin_size range_size = bin_size / 2 + margin_size
offsets = local_yaw - bin_centers.unsqueeze(0) offsets = local_yaw.unsqueeze(1) - bin_centers.unsqueeze(0)
offsets[offsets > np.pi] = offsets[offsets > np.pi] - 2 * np.pi offsets[offsets > np.pi] = offsets[offsets > np.pi] - 2 * np.pi
offsets[offsets < -np.pi] = offsets[offsets < -np.pi] + 2 * np.pi offsets[offsets < -np.pi] = offsets[offsets < -np.pi] + 2 * np.pi
...@@ -98,7 +98,7 @@ class MonoFlexCoder(BaseBBoxCoder): ...@@ -98,7 +98,7 @@ class MonoFlexCoder(BaseBBoxCoder):
offset = offsets[:, i] offset = offsets[:, i]
inds = abs(offset) < range_size inds = abs(offset) < range_size
encode_local_yaw[inds, i] = 1 encode_local_yaw[inds, i] = 1
encode_local_yaw[inds, i + self.num_dir_bins] = offset encode_local_yaw[inds, i + self.num_dir_bins] = offset[inds]
orientation_target = encode_local_yaw orientation_target = encode_local_yaw
...@@ -164,7 +164,7 @@ class MonoFlexCoder(BaseBBoxCoder): ...@@ -164,7 +164,7 @@ class MonoFlexCoder(BaseBBoxCoder):
pred_direct_depth_uncertainty = bbox[:, 49:50].squeeze(-1) pred_direct_depth_uncertainty = bbox[:, 49:50].squeeze(-1)
# 2 dimension of offsets x keypoints (8 corners + top/bottom center) # 2 dimension of offsets x keypoints (8 corners + top/bottom center)
pred_keypoints2d = bbox[:, 6:26] pred_keypoints2d = bbox[:, 6:26].reshape(-1, 10, 2)
# 1 dimension for depth offsets # 1 dimension for depth offsets
pred_direct_depth_offsets = bbox[:, 48:49].squeeze(-1) pred_direct_depth_offsets = bbox[:, 48:49].squeeze(-1)
...@@ -273,11 +273,11 @@ class MonoFlexCoder(BaseBBoxCoder): ...@@ -273,11 +273,11 @@ class MonoFlexCoder(BaseBBoxCoder):
raise NotImplementedError raise NotImplementedError
# (N, 3) # (N, 3)
centers2d_img = \ centers2d_img = \
torch.cat(centers2d_img, depths.unsqueeze(-1), dim=1) torch.cat((centers2d_img, depths.unsqueeze(-1)), dim=1)
# (N, 4, 1) # (N, 4, 1)
centers2d_extend = \ centers2d_extend = \
torch.cat((centers2d_img, centers2d_img.new_ones(N, 1)), torch.cat((centers2d_img, centers2d_img.new_ones(N, 1)),
dim=1).unqueeze(-1) dim=1).unsqueeze(-1)
locations = torch.matmul(cam2imgs_inv, centers2d_extend).squeeze(-1) locations = torch.matmul(cam2imgs_inv, centers2d_extend).squeeze(-1)
return locations[:, :3] return locations[:, :3]
...@@ -450,15 +450,15 @@ class MonoFlexCoder(BaseBBoxCoder): ...@@ -450,15 +450,15 @@ class MonoFlexCoder(BaseBBoxCoder):
local_yaws = orientations local_yaws = orientations
yaws = local_yaws + rays yaws = local_yaws + rays
larger_idx = (yaws > np.pi).nonzero() larger_idx = (yaws > np.pi).nonzero(as_tuple=False)
small_idx = (yaws < -np.pi).nonzero() small_idx = (yaws < -np.pi).nonzero(as_tuple=False)
if len(larger_idx) != 0: if len(larger_idx) != 0:
yaws[larger_idx] -= 2 * np.pi yaws[larger_idx] -= 2 * np.pi
if len(small_idx) != 0: if len(small_idx) != 0:
yaws[small_idx] += 2 * np.pi yaws[small_idx] += 2 * np.pi
larger_idx = (local_yaws > np.pi).nonzero() larger_idx = (local_yaws > np.pi).nonzero(as_tuple=False)
small_idx = (local_yaws < -np.pi).nonzero() small_idx = (local_yaws < -np.pi).nonzero(as_tuple=False)
if len(larger_idx) != 0: if len(larger_idx) != 0:
local_yaws[larger_idx] -= 2 * np.pi local_yaws[larger_idx] -= 2 * np.pi
if len(small_idx) != 0: if len(small_idx) != 0:
...@@ -491,7 +491,7 @@ class MonoFlexCoder(BaseBBoxCoder): ...@@ -491,7 +491,7 @@ class MonoFlexCoder(BaseBBoxCoder):
return bboxes2d return bboxes2d
def combine_depths(depth, depth_uncertainty): def combine_depths(self, depth, depth_uncertainty):
"""Combine all the prediced depths with depth uncertainty. """Combine all the prediced depths with depth uncertainty.
Args: Args:
......
...@@ -324,8 +324,11 @@ def yaw2local(yaw, loc): ...@@ -324,8 +324,11 @@ def yaw2local(yaw, loc):
torch.Tensor: local yaw (alpha in kitti). torch.Tensor: local yaw (alpha in kitti).
""" """
local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2]) local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2])
while local_yaw > np.pi: larger_idx = (local_yaw > np.pi).nonzero(as_tuple=False)
local_yaw -= np.pi * 2 small_idx = (local_yaw < -np.pi).nonzero(as_tuple=False)
while local_yaw < -np.pi: if len(larger_idx) != 0:
local_yaw += np.pi * 2 local_yaw[larger_idx] -= 2 * np.pi
if len(small_idx) != 0:
local_yaw[small_idx] += 2 * np.pi
return local_yaw return local_yaw
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .array_converter import ArrayConverter, array_converter from .array_converter import ArrayConverter, array_converter
from .gaussian import draw_heatmap_gaussian, gaussian_2d, gaussian_radius from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d,
gaussian_radius, get_ellip_gaussian_2D)
__all__ = [ __all__ = [
'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian', 'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian',
'ArrayConverter', 'array_converter' 'ArrayConverter', 'array_converter', 'ellip_gaussian2D',
'get_ellip_gaussian_2D'
] ]
...@@ -84,3 +84,75 @@ def gaussian_radius(det_size, min_overlap=0.5): ...@@ -84,3 +84,75 @@ def gaussian_radius(det_size, min_overlap=0.5):
sq3 = torch.sqrt(b3**2 - 4 * a3 * c3) sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2 r3 = (b3 + sq3) / 2
return min(r1, r2, r3) return min(r1, r2, r3)
def get_ellip_gaussian_2D(heatmap, center, radius_x, radius_y, k=1):
"""Generate 2D ellipse gaussian heatmap.
Args:
heatmap (Tensor): Input heatmap, the gaussian kernel will cover on
it and maintain the max value.
center (list[int]): Coord of gaussian kernel's center.
radius_x (int): X-axis radius of gaussian kernel.
radius_y (int): Y-axis radius of gaussian kernel.
k (int, optional): Coefficient of gaussian kernel. Default: 1.
Returns:
out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.
"""
diameter_x, diameter_y = 2 * radius_x + 1, 2 * radius_y + 1
gaussian_kernel = ellip_gaussian2D((radius_x, radius_y),
sigma_x=diameter_x / 6,
sigma_y=diameter_y / 6,
dtype=heatmap.dtype,
device=heatmap.device)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius_x), min(width - x, radius_x + 1)
top, bottom = min(y, radius_y), min(height - y, radius_y + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian_kernel[radius_y - top:radius_y + bottom,
radius_x - left:radius_x + right]
out_heatmap = heatmap
torch.max(
masked_heatmap,
masked_gaussian * k,
out=out_heatmap[y - top:y + bottom, x - left:x + right])
return out_heatmap
def ellip_gaussian2D(radius,
sigma_x,
sigma_y,
dtype=torch.float32,
device='cpu'):
"""Generate 2D ellipse gaussian kernel.
Args:
radius (tuple(int)): Ellipse radius (radius_x, radius_y) of gaussian
kernel.
sigma_x (int): X-axis sigma of gaussian function.
sigma_y (int): Y-axis sigma of gaussian function.
dtype (torch.dtype, optional): Dtype of gaussian tensor.
Default: torch.float32.
device (str, optional): Device of gaussian tensor.
Default: 'cpu'.
Returns:
h (Tensor): Gaussian kernel with a
``(2 * radius_y + 1) * (2 * radius_x + 1)`` shape.
"""
x = torch.arange(
-radius[0], radius[0] + 1, dtype=dtype, device=device).view(1, -1)
y = torch.arange(
-radius[1], radius[1] + 1, dtype=dtype, device=device).view(-1, 1)
h = (-(x * x) / (2 * sigma_x * sigma_x) - (y * y) /
(2 * sigma_y * sigma_y)).exp()
h[h < torch.finfo(h.dtype).eps * h.max()] = 0
return h
...@@ -7,6 +7,7 @@ from .centerpoint_head import CenterHead ...@@ -7,6 +7,7 @@ from .centerpoint_head import CenterHead
from .fcos_mono3d_head import FCOSMono3DHead from .fcos_mono3d_head import FCOSMono3DHead
from .free_anchor3d_head import FreeAnchor3DHead from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead from .groupfree3d_head import GroupFree3DHead
from .monoflex_head import MonoFlexHead
from .parta2_rpn_head import PartA2RPNHead from .parta2_rpn_head import PartA2RPNHead
from .pgd_head import PGDHead from .pgd_head import PGDHead
from .point_rpn_head import PointRPNHead from .point_rpn_head import PointRPNHead
...@@ -19,5 +20,6 @@ __all__ = [ ...@@ -19,5 +20,6 @@ __all__ = [
'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead', 'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead' 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead'
] ]
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .edge_fusion_module import EdgeFusionModule
from .transformer import GroupFree3DMHA from .transformer import GroupFree3DMHA
from .vote_module import VoteModule from .vote_module import VoteModule
__all__ = ['VoteModule', 'GroupFree3DMHA'] __all__ = ['VoteModule', 'GroupFree3DMHA', 'EdgeFusionModule']
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from torch import nn as nn
from torch.nn import functional as F
class EdgeFusionModule(BaseModule):
"""Edge Fusion Module for feature map.
Args:
out_channels (int): The number of output channels.
feat_channels (int): The number of channels in feature map
during edge feature fusion.
kernel_size (int, optional): Kernel size of convolution.
Default: 3.
act_cfg (dict, optional): Config of activation.
Default: dict(type='ReLU').
norm_cfg (dict, optional): Config of normalization.
Default: dict(type='BN1d')).
"""
def __init__(self,
out_channels,
feat_channels,
kernel_size=3,
act_cfg=dict(type='ReLU'),
norm_cfg=dict(type='BN1d')):
super().__init__()
self.edge_convs = nn.Sequential(
ConvModule(
feat_channels,
feat_channels,
kernel_size=kernel_size,
padding=kernel_size // 2,
conv_cfg=dict(type='Conv1d'),
norm_cfg=norm_cfg,
act_cfg=act_cfg),
nn.Conv1d(feat_channels, out_channels, kernel_size=1))
self.feat_channels = feat_channels
def forward(self, features, fused_features, edge_indices, edge_lens,
output_h, output_w):
"""Forward pass.
Args:
features (torch.Tensor): Different representative features
for fusion.
fused_features (torch.Tensor): Different representative
features to be fused.
edge_indices (torch.Tensor): Batch image edge indices.
edge_lens (list[int]): List of edge length of each image.
output_h (int): Height of output feature map.
output_w (int): Width of output feature map.
Returns:
torch.Tensor: Fused feature maps.
"""
batch_size = features.shape[0]
# normalize
grid_edge_indices = edge_indices.view(batch_size, -1, 1, 2).float()
grid_edge_indices[..., 0] = \
grid_edge_indices[..., 0] / (output_w - 1) * 2 - 1
grid_edge_indices[..., 1] = \
grid_edge_indices[..., 1] / (output_h - 1) * 2 - 1
# apply edge fusion
edge_features = F.grid_sample(
features, grid_edge_indices, align_corners=True).squeeze(-1)
edge_output = self.edge_convs(edge_features)
for k in range(batch_size):
edge_indice_k = edge_indices[k, :edge_lens[k]]
fused_features[k, :, edge_indice_k[:, 1],
edge_indice_k[:, 0]] += edge_output[
k, :, :edge_lens[k]]
return fused_features
...@@ -4,6 +4,7 @@ import torch ...@@ -4,6 +4,7 @@ import torch
def get_edge_indices(img_metas, def get_edge_indices(img_metas,
downsample_ratio,
step=1, step=1,
pad_mode='default', pad_mode='default',
dtype=np.float32, dtype=np.float32,
...@@ -17,6 +18,7 @@ def get_edge_indices(img_metas, ...@@ -17,6 +18,7 @@ def get_edge_indices(img_metas,
Args: Args:
img_metas (list[dict]): Meta information of each image, e.g., img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc. image size, scaling factor, etc.
downsample_ratio (int): Downsample ratio of output feature,
step (int, optional): Step size used for generateing step (int, optional): Step size used for generateing
edge indices. Default: 1. edge indices. Default: 1.
pad_mode (str, optional): Padding mode during data pipeline. pad_mode (str, optional): Padding mode during data pipeline.
...@@ -32,13 +34,21 @@ def get_edge_indices(img_metas, ...@@ -32,13 +34,21 @@ def get_edge_indices(img_metas,
edge_indices_list = [] edge_indices_list = []
for i in range(len(img_metas)): for i in range(len(img_metas)):
img_shape = img_metas[i]['img_shape'] img_shape = img_metas[i]['img_shape']
pad_shape = img_metas[i]['pad_shape']
h, w = img_shape[:2] h, w = img_shape[:2]
pad_h, pad_w = pad_shape
edge_indices = [] edge_indices = []
if pad_mode == 'default': if pad_mode == 'default':
x_min = 0 x_min = 0
y_min = 0 y_min = 0
x_max, y_max = w - 1, h - 1 x_max = (w - 1) // downsample_ratio
y_max = (h - 1) // downsample_ratio
elif pad_mode == 'center':
x_min = np.ceil((pad_w - w) / 2 * downsample_ratio)
y_min = np.ceil((pad_h - h) / 2 * downsample_ratio)
x_max = x_min + w // downsample_ratio
y_max = y_min + h // downsample_ratio
else: else:
raise NotImplementedError raise NotImplementedError
......
...@@ -1505,3 +1505,62 @@ def test_pgd_head(): ...@@ -1505,3 +1505,62 @@ def test_pgd_head():
assert results[0][2].shape == torch.Size([20]) assert results[0][2].shape == torch.Size([20])
assert results[0][3] is None assert results[0][3] is None
assert results[0][4].shape == torch.Size([20, 5]) assert results[0][4].shape == torch.Size([20, 5])
def test_monoflex_head():
head_cfg = dict(
type='MonoFlexHead',
num_classes=3,
in_channels=64,
use_edge_fusion=True,
edge_fusion_inds=[(1, 0)],
edge_heatmap_ratio=1 / 8,
stacked_convs=0,
feat_channels=64,
use_direction_classifier=False,
diff_rad_by_sin=False,
pred_attrs=False,
pred_velo=False,
dir_offset=0,
strides=None,
group_reg_dims=((4, ), (2, ), (20, ), (3, ), (3, ), (8, 8), (1, ),
(1, )),
cls_branch=(256, ),
reg_branch=((256, ), (256, ), (256, ), (256, ), (256, ), (256, ),
(256, ), (256, )),
num_attrs=0,
bbox_code_size=7,
dir_branch=(),
attr_branch=(),
bbox_coder=dict(
type='MonoFlexCoder',
depth_mode='exp',
base_depth=(26.494627, 16.05988),
depth_range=[0.1, 100],
combine_depth=True,
uncertainty_range=[-10, 10],
base_dims=((3.8840, 1.5261, 1.6286, 0.4259, 0.1367, 0.1022),
(0.8423, 1.7607, 0.6602, 0.2349, 0.1133, 0.1427),
(1.7635, 1.7372, 0.5968, 0.1766, 0.0948, 0.1242)),
dims_mode='linear',
multibin=True,
num_dir_bins=4,
bin_centers=[0, np.pi / 2, np.pi, -np.pi / 2],
bin_margin=np.pi / 6,
code_size=7),
conv_bias=True,
dcn_on_last_conv=False)
self = build_head(head_cfg)
feats = [torch.rand([2, 64, 32, 32], dtype=torch.float32)]
input_metas = [
dict(img_shape=(110, 110), pad_shape=(128, 128)),
dict(img_shape=(98, 110), pad_shape=(128, 128))
]
cls_score, out_reg = self(feats, input_metas)
assert cls_score[0].shape == torch.Size([2, 3, 32, 32])
assert out_reg[0].shape == torch.Size([2, 50, 32, 32])
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch import torch
from mmcv.cnn import Scale from mmcv.cnn import Scale
from torch import nn as nn from torch import nn as nn
...@@ -596,3 +597,69 @@ def test_smoke_bbox_coder(): ...@@ -596,3 +597,69 @@ def test_smoke_bbox_coder():
locations = torch.tensor([[15., 2., 1.], [15., 2., -1.]]) locations = torch.tensor([[15., 2., 1.], [15., 2., -1.]])
orientations = bbox_coder._decode_orientation(ori_vector, locations) orientations = bbox_coder._decode_orientation(ori_vector, locations)
assert orientations.shape == torch.Size([2, 1]) assert orientations.shape == torch.Size([2, 1])
def test_monoflex_bbox_coder():
bbox_coder_cfg = dict(
type='MonoFlexCoder',
depth_mode='exp',
base_depth=(26.494627, 16.05988),
depth_range=[0.1, 100],
combine_depth=True,
uncertainty_range=[-10, 10],
base_dims=((3.8840, 1.5261, 1.6286, 0.4259, 0.1367,
0.1022), (0.8423, 1.7607, 0.6602, 0.2349, 0.1133, 0.1427),
(1.7635, 1.7372, 0.5968, 0.1766, 0.0948, 0.1242)),
dims_mode='linear',
multibin=True,
num_dir_bins=4,
bin_centers=[0, np.pi / 2, np.pi, -np.pi / 2],
bin_margin=np.pi / 6,
code_size=7)
bbox_coder = build_bbox_coder(bbox_coder_cfg)
gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([6, 7]))
orientation_target = bbox_coder.encode(gt_bboxes_3d)
assert orientation_target.shape == torch.Size([6, 8])
regression = torch.rand([100, 50])
base_centers2d = torch.rand([100, 2])
labels = torch.ones([100])
downsample_ratio = 4
cam2imgs = torch.rand([100, 4, 4])
preds = bbox_coder.decode(regression, base_centers2d, labels,
downsample_ratio, cam2imgs)
assert preds['bboxes2d'].shape == torch.Size([100, 4])
assert preds['dimensions'].shape == torch.Size([100, 3])
assert preds['offsets2d'].shape == torch.Size([100, 2])
assert preds['keypoints2d'].shape == torch.Size([100, 10, 2])
assert preds['orientations'].shape == torch.Size([100, 16])
assert preds['direct_depth'].shape == torch.Size([
100,
])
assert preds['keypoints_depth'].shape == torch.Size([100, 3])
assert preds['combined_depth'].shape == torch.Size([
100,
])
assert preds['direct_depth_uncertainty'].shape == torch.Size([
100,
])
assert preds['keypoints_depth_uncertainty'].shape == torch.Size([100, 3])
offsets_2d = torch.randn([100, 2])
depths = torch.randn([
100,
])
locations = bbox_coder.decode_location(base_centers2d, offsets_2d, depths,
cam2imgs, downsample_ratio)
assert locations.shape == torch.Size([100, 3])
orientations = torch.randn([100, 16])
yaws, local_yaws = bbox_coder.decode_orientation(orientations, locations)
assert yaws.shape == torch.Size([
100,
])
assert local_yaws.shape == torch.Size([
100,
])
...@@ -195,11 +195,15 @@ def test_points_img2cam(): ...@@ -195,11 +195,15 @@ def test_points_img2cam():
def test_generate_edge_indices(): def test_generate_edge_indices():
img_metas = [dict(img_shape=[300, 400]), dict(img_shape=[500, 450])] input_metas = [
edge_indices_list = get_edge_indices(img_metas) dict(img_shape=(110, 110), pad_shape=(128, 128)),
dict(img_shape=(98, 110), pad_shape=(128, 128))
]
downsample_ratio = 4
edge_indices_list = get_edge_indices(input_metas, downsample_ratio)
assert edge_indices_list[0].shape[0] == 1396 assert edge_indices_list[0].shape[0] == 108
assert edge_indices_list[1].shape[0] == 1896 assert edge_indices_list[1].shape[0] == 102
def test_truncation_hanlde(): def test_truncation_hanlde():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment