"docs/git@developer.sourcefind.cn:change/sglang.git" did not exist on "c9fc4a9d26b48639f1848168327c26321dc7c126"
Unverified Commit 8538177b authored by ChaimZhu's avatar ChaimZhu Committed by GitHub
Browse files

[Feature] Add MonoFlex Head (#1044)

parent 4590418e
......@@ -81,16 +81,16 @@ class MonoFlexCoder(BaseBBoxCoder):
torch.Tensor: Targets of orientations.
"""
local_yaw = gt_bboxes_3d.local_yaw
# encode local yaw (-pi ~ pi) to multibin format
encode_local_yaw = np.zeros(self.num_dir_bins * 2)
encode_local_yaw = local_yaw.new_zeros(
[local_yaw.shape[0], self.num_dir_bins * 2])
bin_size = 2 * np.pi / self.num_dir_bins
margin_size = bin_size * self.bin_margin
bin_centers = self.bin_centers
bin_centers = local_yaw.new_tensor(self.bin_centers)
range_size = bin_size / 2 + margin_size
offsets = local_yaw - bin_centers.unsqueeze(0)
offsets = local_yaw.unsqueeze(1) - bin_centers.unsqueeze(0)
offsets[offsets > np.pi] = offsets[offsets > np.pi] - 2 * np.pi
offsets[offsets < -np.pi] = offsets[offsets < -np.pi] + 2 * np.pi
......@@ -98,7 +98,7 @@ class MonoFlexCoder(BaseBBoxCoder):
offset = offsets[:, i]
inds = abs(offset) < range_size
encode_local_yaw[inds, i] = 1
encode_local_yaw[inds, i + self.num_dir_bins] = offset
encode_local_yaw[inds, i + self.num_dir_bins] = offset[inds]
orientation_target = encode_local_yaw
......@@ -164,7 +164,7 @@ class MonoFlexCoder(BaseBBoxCoder):
pred_direct_depth_uncertainty = bbox[:, 49:50].squeeze(-1)
# 2 dimension of offsets x keypoints (8 corners + top/bottom center)
pred_keypoints2d = bbox[:, 6:26]
pred_keypoints2d = bbox[:, 6:26].reshape(-1, 10, 2)
# 1 dimension for depth offsets
pred_direct_depth_offsets = bbox[:, 48:49].squeeze(-1)
......@@ -273,11 +273,11 @@ class MonoFlexCoder(BaseBBoxCoder):
raise NotImplementedError
# (N, 3)
centers2d_img = \
torch.cat(centers2d_img, depths.unsqueeze(-1), dim=1)
torch.cat((centers2d_img, depths.unsqueeze(-1)), dim=1)
# (N, 4, 1)
centers2d_extend = \
torch.cat((centers2d_img, centers2d_img.new_ones(N, 1)),
dim=1).unqueeze(-1)
dim=1).unsqueeze(-1)
locations = torch.matmul(cam2imgs_inv, centers2d_extend).squeeze(-1)
return locations[:, :3]
......@@ -450,15 +450,15 @@ class MonoFlexCoder(BaseBBoxCoder):
local_yaws = orientations
yaws = local_yaws + rays
larger_idx = (yaws > np.pi).nonzero()
small_idx = (yaws < -np.pi).nonzero()
larger_idx = (yaws > np.pi).nonzero(as_tuple=False)
small_idx = (yaws < -np.pi).nonzero(as_tuple=False)
if len(larger_idx) != 0:
yaws[larger_idx] -= 2 * np.pi
if len(small_idx) != 0:
yaws[small_idx] += 2 * np.pi
larger_idx = (local_yaws > np.pi).nonzero()
small_idx = (local_yaws < -np.pi).nonzero()
larger_idx = (local_yaws > np.pi).nonzero(as_tuple=False)
small_idx = (local_yaws < -np.pi).nonzero(as_tuple=False)
if len(larger_idx) != 0:
local_yaws[larger_idx] -= 2 * np.pi
if len(small_idx) != 0:
......@@ -491,7 +491,7 @@ class MonoFlexCoder(BaseBBoxCoder):
return bboxes2d
def combine_depths(depth, depth_uncertainty):
def combine_depths(self, depth, depth_uncertainty):
"""Combine all the prediced depths with depth uncertainty.
Args:
......
......@@ -324,8 +324,11 @@ def yaw2local(yaw, loc):
torch.Tensor: local yaw (alpha in kitti).
"""
local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2])
while local_yaw > np.pi:
local_yaw -= np.pi * 2
while local_yaw < -np.pi:
local_yaw += np.pi * 2
larger_idx = (local_yaw > np.pi).nonzero(as_tuple=False)
small_idx = (local_yaw < -np.pi).nonzero(as_tuple=False)
if len(larger_idx) != 0:
local_yaw[larger_idx] -= 2 * np.pi
if len(small_idx) != 0:
local_yaw[small_idx] += 2 * np.pi
return local_yaw
# Copyright (c) OpenMMLab. All rights reserved.
from .array_converter import ArrayConverter, array_converter
from .gaussian import draw_heatmap_gaussian, gaussian_2d, gaussian_radius
from .gaussian import (draw_heatmap_gaussian, ellip_gaussian2D, gaussian_2d,
gaussian_radius, get_ellip_gaussian_2D)
__all__ = [
'gaussian_2d', 'gaussian_radius', 'draw_heatmap_gaussian',
'ArrayConverter', 'array_converter'
'ArrayConverter', 'array_converter', 'ellip_gaussian2D',
'get_ellip_gaussian_2D'
]
......@@ -84,3 +84,75 @@ def gaussian_radius(det_size, min_overlap=0.5):
sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2
return min(r1, r2, r3)
def get_ellip_gaussian_2D(heatmap, center, radius_x, radius_y, k=1):
"""Generate 2D ellipse gaussian heatmap.
Args:
heatmap (Tensor): Input heatmap, the gaussian kernel will cover on
it and maintain the max value.
center (list[int]): Coord of gaussian kernel's center.
radius_x (int): X-axis radius of gaussian kernel.
radius_y (int): Y-axis radius of gaussian kernel.
k (int, optional): Coefficient of gaussian kernel. Default: 1.
Returns:
out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.
"""
diameter_x, diameter_y = 2 * radius_x + 1, 2 * radius_y + 1
gaussian_kernel = ellip_gaussian2D((radius_x, radius_y),
sigma_x=diameter_x / 6,
sigma_y=diameter_y / 6,
dtype=heatmap.dtype,
device=heatmap.device)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius_x), min(width - x, radius_x + 1)
top, bottom = min(y, radius_y), min(height - y, radius_y + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian_kernel[radius_y - top:radius_y + bottom,
radius_x - left:radius_x + right]
out_heatmap = heatmap
torch.max(
masked_heatmap,
masked_gaussian * k,
out=out_heatmap[y - top:y + bottom, x - left:x + right])
return out_heatmap
def ellip_gaussian2D(radius,
sigma_x,
sigma_y,
dtype=torch.float32,
device='cpu'):
"""Generate 2D ellipse gaussian kernel.
Args:
radius (tuple(int)): Ellipse radius (radius_x, radius_y) of gaussian
kernel.
sigma_x (int): X-axis sigma of gaussian function.
sigma_y (int): Y-axis sigma of gaussian function.
dtype (torch.dtype, optional): Dtype of gaussian tensor.
Default: torch.float32.
device (str, optional): Device of gaussian tensor.
Default: 'cpu'.
Returns:
h (Tensor): Gaussian kernel with a
``(2 * radius_y + 1) * (2 * radius_x + 1)`` shape.
"""
x = torch.arange(
-radius[0], radius[0] + 1, dtype=dtype, device=device).view(1, -1)
y = torch.arange(
-radius[1], radius[1] + 1, dtype=dtype, device=device).view(-1, 1)
h = (-(x * x) / (2 * sigma_x * sigma_x) - (y * y) /
(2 * sigma_y * sigma_y)).exp()
h[h < torch.finfo(h.dtype).eps * h.max()] = 0
return h
......@@ -7,6 +7,7 @@ from .centerpoint_head import CenterHead
from .fcos_mono3d_head import FCOSMono3DHead
from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead
from .monoflex_head import MonoFlexHead
from .parta2_rpn_head import PartA2RPNHead
from .pgd_head import PGDHead
from .point_rpn_head import PointRPNHead
......@@ -19,5 +20,6 @@ __all__ = [
'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead'
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead'
]
This diff is collapsed.
# Copyright (c) OpenMMLab. All rights reserved.
from .edge_fusion_module import EdgeFusionModule
from .transformer import GroupFree3DMHA
from .vote_module import VoteModule
__all__ = ['VoteModule', 'GroupFree3DMHA']
__all__ = ['VoteModule', 'GroupFree3DMHA', 'EdgeFusionModule']
from mmcv.cnn import ConvModule
from mmcv.runner import BaseModule
from torch import nn as nn
from torch.nn import functional as F
class EdgeFusionModule(BaseModule):
"""Edge Fusion Module for feature map.
Args:
out_channels (int): The number of output channels.
feat_channels (int): The number of channels in feature map
during edge feature fusion.
kernel_size (int, optional): Kernel size of convolution.
Default: 3.
act_cfg (dict, optional): Config of activation.
Default: dict(type='ReLU').
norm_cfg (dict, optional): Config of normalization.
Default: dict(type='BN1d')).
"""
def __init__(self,
out_channels,
feat_channels,
kernel_size=3,
act_cfg=dict(type='ReLU'),
norm_cfg=dict(type='BN1d')):
super().__init__()
self.edge_convs = nn.Sequential(
ConvModule(
feat_channels,
feat_channels,
kernel_size=kernel_size,
padding=kernel_size // 2,
conv_cfg=dict(type='Conv1d'),
norm_cfg=norm_cfg,
act_cfg=act_cfg),
nn.Conv1d(feat_channels, out_channels, kernel_size=1))
self.feat_channels = feat_channels
def forward(self, features, fused_features, edge_indices, edge_lens,
output_h, output_w):
"""Forward pass.
Args:
features (torch.Tensor): Different representative features
for fusion.
fused_features (torch.Tensor): Different representative
features to be fused.
edge_indices (torch.Tensor): Batch image edge indices.
edge_lens (list[int]): List of edge length of each image.
output_h (int): Height of output feature map.
output_w (int): Width of output feature map.
Returns:
torch.Tensor: Fused feature maps.
"""
batch_size = features.shape[0]
# normalize
grid_edge_indices = edge_indices.view(batch_size, -1, 1, 2).float()
grid_edge_indices[..., 0] = \
grid_edge_indices[..., 0] / (output_w - 1) * 2 - 1
grid_edge_indices[..., 1] = \
grid_edge_indices[..., 1] / (output_h - 1) * 2 - 1
# apply edge fusion
edge_features = F.grid_sample(
features, grid_edge_indices, align_corners=True).squeeze(-1)
edge_output = self.edge_convs(edge_features)
for k in range(batch_size):
edge_indice_k = edge_indices[k, :edge_lens[k]]
fused_features[k, :, edge_indice_k[:, 1],
edge_indice_k[:, 0]] += edge_output[
k, :, :edge_lens[k]]
return fused_features
......@@ -4,6 +4,7 @@ import torch
def get_edge_indices(img_metas,
downsample_ratio,
step=1,
pad_mode='default',
dtype=np.float32,
......@@ -17,6 +18,7 @@ def get_edge_indices(img_metas,
Args:
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
downsample_ratio (int): Downsample ratio of output feature,
step (int, optional): Step size used for generateing
edge indices. Default: 1.
pad_mode (str, optional): Padding mode during data pipeline.
......@@ -32,13 +34,21 @@ def get_edge_indices(img_metas,
edge_indices_list = []
for i in range(len(img_metas)):
img_shape = img_metas[i]['img_shape']
pad_shape = img_metas[i]['pad_shape']
h, w = img_shape[:2]
pad_h, pad_w = pad_shape
edge_indices = []
if pad_mode == 'default':
x_min = 0
y_min = 0
x_max, y_max = w - 1, h - 1
x_max = (w - 1) // downsample_ratio
y_max = (h - 1) // downsample_ratio
elif pad_mode == 'center':
x_min = np.ceil((pad_w - w) / 2 * downsample_ratio)
y_min = np.ceil((pad_h - h) / 2 * downsample_ratio)
x_max = x_min + w // downsample_ratio
y_max = y_min + h // downsample_ratio
else:
raise NotImplementedError
......
......@@ -1505,3 +1505,62 @@ def test_pgd_head():
assert results[0][2].shape == torch.Size([20])
assert results[0][3] is None
assert results[0][4].shape == torch.Size([20, 5])
def test_monoflex_head():
head_cfg = dict(
type='MonoFlexHead',
num_classes=3,
in_channels=64,
use_edge_fusion=True,
edge_fusion_inds=[(1, 0)],
edge_heatmap_ratio=1 / 8,
stacked_convs=0,
feat_channels=64,
use_direction_classifier=False,
diff_rad_by_sin=False,
pred_attrs=False,
pred_velo=False,
dir_offset=0,
strides=None,
group_reg_dims=((4, ), (2, ), (20, ), (3, ), (3, ), (8, 8), (1, ),
(1, )),
cls_branch=(256, ),
reg_branch=((256, ), (256, ), (256, ), (256, ), (256, ), (256, ),
(256, ), (256, )),
num_attrs=0,
bbox_code_size=7,
dir_branch=(),
attr_branch=(),
bbox_coder=dict(
type='MonoFlexCoder',
depth_mode='exp',
base_depth=(26.494627, 16.05988),
depth_range=[0.1, 100],
combine_depth=True,
uncertainty_range=[-10, 10],
base_dims=((3.8840, 1.5261, 1.6286, 0.4259, 0.1367, 0.1022),
(0.8423, 1.7607, 0.6602, 0.2349, 0.1133, 0.1427),
(1.7635, 1.7372, 0.5968, 0.1766, 0.0948, 0.1242)),
dims_mode='linear',
multibin=True,
num_dir_bins=4,
bin_centers=[0, np.pi / 2, np.pi, -np.pi / 2],
bin_margin=np.pi / 6,
code_size=7),
conv_bias=True,
dcn_on_last_conv=False)
self = build_head(head_cfg)
feats = [torch.rand([2, 64, 32, 32], dtype=torch.float32)]
input_metas = [
dict(img_shape=(110, 110), pad_shape=(128, 128)),
dict(img_shape=(98, 110), pad_shape=(128, 128))
]
cls_score, out_reg = self(feats, input_metas)
assert cls_score[0].shape == torch.Size([2, 3, 32, 32])
assert out_reg[0].shape == torch.Size([2, 50, 32, 32])
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmcv.cnn import Scale
from torch import nn as nn
......@@ -596,3 +597,69 @@ def test_smoke_bbox_coder():
locations = torch.tensor([[15., 2., 1.], [15., 2., -1.]])
orientations = bbox_coder._decode_orientation(ori_vector, locations)
assert orientations.shape == torch.Size([2, 1])
def test_monoflex_bbox_coder():
bbox_coder_cfg = dict(
type='MonoFlexCoder',
depth_mode='exp',
base_depth=(26.494627, 16.05988),
depth_range=[0.1, 100],
combine_depth=True,
uncertainty_range=[-10, 10],
base_dims=((3.8840, 1.5261, 1.6286, 0.4259, 0.1367,
0.1022), (0.8423, 1.7607, 0.6602, 0.2349, 0.1133, 0.1427),
(1.7635, 1.7372, 0.5968, 0.1766, 0.0948, 0.1242)),
dims_mode='linear',
multibin=True,
num_dir_bins=4,
bin_centers=[0, np.pi / 2, np.pi, -np.pi / 2],
bin_margin=np.pi / 6,
code_size=7)
bbox_coder = build_bbox_coder(bbox_coder_cfg)
gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([6, 7]))
orientation_target = bbox_coder.encode(gt_bboxes_3d)
assert orientation_target.shape == torch.Size([6, 8])
regression = torch.rand([100, 50])
base_centers2d = torch.rand([100, 2])
labels = torch.ones([100])
downsample_ratio = 4
cam2imgs = torch.rand([100, 4, 4])
preds = bbox_coder.decode(regression, base_centers2d, labels,
downsample_ratio, cam2imgs)
assert preds['bboxes2d'].shape == torch.Size([100, 4])
assert preds['dimensions'].shape == torch.Size([100, 3])
assert preds['offsets2d'].shape == torch.Size([100, 2])
assert preds['keypoints2d'].shape == torch.Size([100, 10, 2])
assert preds['orientations'].shape == torch.Size([100, 16])
assert preds['direct_depth'].shape == torch.Size([
100,
])
assert preds['keypoints_depth'].shape == torch.Size([100, 3])
assert preds['combined_depth'].shape == torch.Size([
100,
])
assert preds['direct_depth_uncertainty'].shape == torch.Size([
100,
])
assert preds['keypoints_depth_uncertainty'].shape == torch.Size([100, 3])
offsets_2d = torch.randn([100, 2])
depths = torch.randn([
100,
])
locations = bbox_coder.decode_location(base_centers2d, offsets_2d, depths,
cam2imgs, downsample_ratio)
assert locations.shape == torch.Size([100, 3])
orientations = torch.randn([100, 16])
yaws, local_yaws = bbox_coder.decode_orientation(orientations, locations)
assert yaws.shape == torch.Size([
100,
])
assert local_yaws.shape == torch.Size([
100,
])
......@@ -195,11 +195,15 @@ def test_points_img2cam():
def test_generate_edge_indices():
img_metas = [dict(img_shape=[300, 400]), dict(img_shape=[500, 450])]
edge_indices_list = get_edge_indices(img_metas)
input_metas = [
dict(img_shape=(110, 110), pad_shape=(128, 128)),
dict(img_shape=(98, 110), pad_shape=(128, 128))
]
downsample_ratio = 4
edge_indices_list = get_edge_indices(input_metas, downsample_ratio)
assert edge_indices_list[0].shape[0] == 1396
assert edge_indices_list[1].shape[0] == 1896
assert edge_indices_list[0].shape[0] == 108
assert edge_indices_list[1].shape[0] == 102
def test_truncation_hanlde():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment