Commit 19472568 authored by 雍大凯's avatar 雍大凯
Browse files

将子模块转换为普通目录

parent 51e55208
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import xavier_init, constant_init
from mmcv.cnn.bricks.registry import (ATTENTION,
TRANSFORMER_LAYER,
TRANSFORMER_LAYER_SEQUENCE)
from mmcv.cnn.bricks.transformer import build_attention
import math
from mmcv.runner import force_fp32, auto_fp16
from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
from mmcv.utils import ext_loader
from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32, \
MultiScaleDeformableAttnFunction_fp16
from projects.mmdet3d_plugin.models.utils.bricks import run_time
ext_module = ext_loader.load_ext(
'_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
@ATTENTION.register_module()
class SpatialCrossAttention(BaseModule):
"""An attention module used in BEVFormer.
Args:
embed_dims (int): The embedding dimension of Attention.
Default: 256.
num_cams (int): The number of cameras
dropout (float): A Dropout layer on `inp_residual`.
Default: 0..
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
deformable_attention: (dict): The config for the deformable attention used in SCA.
"""
def __init__(self,
embed_dims=256,
num_cams=6,
pc_range=None,
dropout=0.1,
init_cfg=None,
batch_first=False,
deformable_attention=dict(
type='MSDeformableAttention3D',
embed_dims=256,
num_levels=4),
**kwargs
):
super(SpatialCrossAttention, self).__init__(init_cfg)
self.init_cfg = init_cfg
self.dropout = nn.Dropout(dropout)
self.pc_range = pc_range
self.fp16_enabled = False
self.deformable_attention = build_attention(deformable_attention)
self.embed_dims = embed_dims
self.num_cams = num_cams
self.output_proj = nn.Linear(embed_dims, embed_dims)
self.batch_first = batch_first
self.init_weight()
def init_weight(self):
"""Default initialization for Parameters of Module."""
xavier_init(self.output_proj, distribution='uniform', bias=0.)
@force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points_cam'))
def forward(self,
query,
key,
value,
residual=None,
query_pos=None,
key_padding_mask=None,
reference_points=None,
spatial_shapes=None,
reference_points_cam=None,
bev_mask=None,
level_start_index=None,
flag='encoder',
**kwargs):
"""Forward Function of Detr3DCrossAtten.
Args:
query (Tensor): Query of Transformer with shape
(num_query, bs, embed_dims).
key (Tensor): The key tensor with shape
`(num_key, bs, embed_dims)`.
value (Tensor): The value tensor with shape
`(num_key, bs, embed_dims)`. (B, N, C, H, W)
residual (Tensor): The tensor used for addition, with the
same shape as `x`. Default None. If None, `x` will be used.
query_pos (Tensor): The positional encoding for `query`.
Default: None.
key_pos (Tensor): The positional encoding for `key`. Default
None.
reference_points (Tensor): The normalized reference
points with shape (bs, num_query, 4),
all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to
form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
different level. With shape (num_levels, 2),
last dimension represent (h, w).
level_start_index (Tensor): The start index of each level.
A tensor has shape (num_levels) and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
"""
if key is None:
key = query
if value is None:
value = key
if residual is None:
inp_residual = query
slots = torch.zeros_like(query)
if query_pos is not None:
query = query + query_pos
bs, num_query, _ = query.size()
D = reference_points_cam.size(3)
indexes = []
for i, mask_per_img in enumerate(bev_mask):
index_query_per_img = mask_per_img[0].sum(-1).nonzero().squeeze(-1)
indexes.append(index_query_per_img)
max_len = max([len(each) for each in indexes])
# each camera only interacts with its corresponding BEV queries. This step can greatly save GPU memory.
queries_rebatch = query.new_zeros(
[bs, self.num_cams, max_len, self.embed_dims])
reference_points_rebatch = reference_points_cam.new_zeros(
[bs, self.num_cams, max_len, D, 2])
for j in range(bs):
for i, reference_points_per_img in enumerate(reference_points_cam):
index_query_per_img = indexes[i]
queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]
reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]
num_cams, l, bs, embed_dims = key.shape
key = key.permute(2, 0, 1, 3).reshape(
bs * self.num_cams, l, self.embed_dims)
value = value.permute(2, 0, 1, 3).reshape(
bs * self.num_cams, l, self.embed_dims)
queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), key=key, value=value,
reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), spatial_shapes=spatial_shapes,
level_start_index=level_start_index).view(bs, self.num_cams, max_len, self.embed_dims)
for j in range(bs):
for i, index_query_per_img in enumerate(indexes):
slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]
count = bev_mask.sum(-1) > 0
count = count.permute(1, 2, 0).sum(-1)
count = torch.clamp(count, min=1.0)
slots = slots / count[..., None]
slots = self.output_proj(slots)
return self.dropout(slots) + inp_residual
@ATTENTION.register_module()
class MSDeformableAttention3D(BaseModule):
"""An attention module used in BEVFormer based on Deformable-Detr.
`Deformable DETR: Deformable Transformers for End-to-End Object Detection.
<https://arxiv.org/pdf/2010.04159.pdf>`_.
Args:
embed_dims (int): The embedding dimension of Attention.
Default: 256.
num_heads (int): Parallel attention heads. Default: 64.
num_levels (int): The number of feature map used in
Attention. Default: 4.
num_points (int): The number of sampling points for
each query in each head. Default: 4.
im2col_step (int): The step used in image_to_column.
Default: 64.
dropout (float): A Dropout layer on `inp_identity`.
Default: 0.1.
batch_first (bool): Key, Query and Value are shape of
(batch, n, embed_dim)
or (n, batch, embed_dim). Default to False.
norm_cfg (dict): Config dict for normalization layer.
Default: None.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
"""
def __init__(self,
embed_dims=256,
num_heads=8,
num_levels=4,
num_points=8,
im2col_step=64,
dropout=0.1,
batch_first=True,
norm_cfg=None,
init_cfg=None):
super().__init__(init_cfg)
if embed_dims % num_heads != 0:
raise ValueError(f'embed_dims must be divisible by num_heads, '
f'but got {embed_dims} and {num_heads}')
dim_per_head = embed_dims // num_heads
self.norm_cfg = norm_cfg
self.batch_first = batch_first
self.output_proj = None
self.fp16_enabled = False
# you'd better set dim_per_head to a power of 2
# which is more efficient in the CUDA implementation
def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0):
raise ValueError(
'invalid input for _is_power_of_2: {} (type: {})'.format(
n, type(n)))
return (n & (n - 1) == 0) and n != 0
if not _is_power_of_2(dim_per_head):
warnings.warn(
"You'd better set embed_dims in "
'MultiScaleDeformAttention to make '
'the dimension of each attention head a power of 2 '
'which is more efficient in our CUDA implementation.')
self.im2col_step = im2col_step
self.embed_dims = embed_dims
self.num_levels = num_levels
self.num_heads = num_heads
self.num_points = num_points
self.sampling_offsets = nn.Linear(
embed_dims, num_heads * num_levels * num_points * 2)
self.attention_weights = nn.Linear(embed_dims,
num_heads * num_levels * num_points)
self.value_proj = nn.Linear(embed_dims, embed_dims)
self.init_weights()
def init_weights(self):
"""Default initialization for Parameters of Module."""
constant_init(self.sampling_offsets, 0.)
thetas = torch.arange(
self.num_heads,
dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
grid_init = (grid_init /
grid_init.abs().max(-1, keepdim=True)[0]).view(
self.num_heads, 1, 1,
2).repeat(1, self.num_levels, self.num_points, 1)
for i in range(self.num_points):
grid_init[:, :, i, :] *= i + 1
self.sampling_offsets.bias.data = grid_init.view(-1)
constant_init(self.attention_weights, val=0., bias=0.)
xavier_init(self.value_proj, distribution='uniform', bias=0.)
xavier_init(self.output_proj, distribution='uniform', bias=0.)
self._is_init = True
def forward(self,
query,
key=None,
value=None,
identity=None,
query_pos=None,
key_padding_mask=None,
reference_points=None,
spatial_shapes=None,
level_start_index=None,
**kwargs):
"""Forward Function of MultiScaleDeformAttention.
Args:
query (Tensor): Query of Transformer with shape
( bs, num_query, embed_dims).
key (Tensor): The key tensor with shape
`(bs, num_key, embed_dims)`.
value (Tensor): The value tensor with shape
`(bs, num_key, embed_dims)`.
identity (Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None,
`query` will be used.
query_pos (Tensor): The positional encoding for `query`.
Default: None.
key_pos (Tensor): The positional encoding for `key`. Default
None.
reference_points (Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to
form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2),
last dimension represents (h, w).
level_start_index (Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
"""
if value is None:
value = query
if identity is None:
identity = query
if query_pos is not None:
query = query + query_pos
if not self.batch_first:
# change to (bs, num_query ,embed_dims)
query = query.permute(1, 0, 2)
value = value.permute(1, 0, 2)
bs, num_query, _ = query.shape
bs, num_value, _ = value.shape
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
value = self.value_proj(value)
if key_padding_mask is not None:
value = value.masked_fill(key_padding_mask[..., None], 0.0)
value = value.view(bs, num_value, self.num_heads, -1)
sampling_offsets = self.sampling_offsets(query).view(
bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
attention_weights = self.attention_weights(query).view(
bs, num_query, self.num_heads, self.num_levels * self.num_points)
attention_weights = attention_weights.softmax(-1)
attention_weights = attention_weights.view(bs, num_query,
self.num_heads,
self.num_levels,
self.num_points)
if reference_points.shape[-1] == 2:
"""
For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.
After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.
For each referent point, we sample `num_points` sampling points.
For `num_Z_anchors` reference points, it has overall `num_points * num_Z_anchors` sampling points.
"""
offset_normalizer = torch.stack(
[spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
bs, num_query, num_Z_anchors, xy = reference_points.shape
reference_points = reference_points[:, :, None, None, None, :, :]
sampling_offsets = sampling_offsets / \
offset_normalizer[None, None, None, :, None, :]
bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape
sampling_offsets = sampling_offsets.view(
bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)
sampling_locations = reference_points + sampling_offsets
bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape
assert num_all_points == num_points * num_Z_anchors
sampling_locations = sampling_locations.view(
bs, num_query, num_heads, num_levels, num_all_points, xy)
elif reference_points.shape[-1] == 4:
assert False
else:
raise ValueError(
f'Last dim of reference_points must be'
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
# sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2
# attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points
#
if torch.cuda.is_available() and value.is_cuda:
if value.dtype == torch.float16:
MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
else:
MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
output = MultiScaleDeformableAttnFunction.apply(
value, spatial_shapes, level_start_index, sampling_locations,
attention_weights, self.im2col_step)
else:
output = multi_scale_deformable_attn_pytorch(
value, spatial_shapes, sampling_locations, attention_weights)
if not self.batch_first:
output = output.permute(1, 0, 2)
return output
@ATTENTION.register_module()
class MSIPM3D(BaseModule):
"""An attention module used in BEVFormer based on Deformable-Detr.
`Deformable DETR: Deformable Transformers for End-to-End Object Detection.
<https://arxiv.org/pdf/2010.04159.pdf>`_.
Args:
embed_dims (int): The embedding dimension of Attention.
Default: 256.
num_heads (int): Parallel attention heads. Default: 64.
num_levels (int): The number of feature map used in
Attention. Default: 4.
num_points (int): The number of sampling points for
each query in each head. Default: 4.
im2col_step (int): The step used in image_to_column.
Default: 64.
dropout (float): A Dropout layer on `inp_identity`.
Default: 0.1.
batch_first (bool): Key, Query and Value are shape of
(batch, n, embed_dim)
or (n, batch, embed_dim). Default to False.
norm_cfg (dict): Config dict for normalization layer.
Default: None.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
"""
def __init__(self,
embed_dims=256,
num_heads=8,
num_levels=4,
num_points=8,
im2col_step=64,
dropout=0.1,
batch_first=True,
norm_cfg=None,
init_cfg=None):
super().__init__(init_cfg)
if embed_dims % num_heads != 0:
raise ValueError(f'embed_dims must be divisible by num_heads, '
f'but got {embed_dims} and {num_heads}')
dim_per_head = embed_dims // num_heads
self.norm_cfg = norm_cfg
self.batch_first = batch_first
self.output_proj = None
self.fp16_enabled = False
# you'd better set dim_per_head to a power of 2
# which is more efficient in the CUDA implementation
def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0):
raise ValueError(
'invalid input for _is_power_of_2: {} (type: {})'.format(
n, type(n)))
return (n & (n - 1) == 0) and n != 0
if not _is_power_of_2(dim_per_head):
warnings.warn(
"You'd better set embed_dims in "
'MultiScaleDeformAttention to make '
'the dimension of each attention head a power of 2 '
'which is more efficient in our CUDA implementation.')
self.im2col_step = im2col_step
self.embed_dims = embed_dims
self.num_levels = num_levels
self.num_heads = num_heads
self.num_points = num_points
# self.sampling_offsets = nn.Linear(
# embed_dims, num_heads * num_levels * num_points * 2)
# self.attention_weights = nn.Linear(embed_dims,
# num_heads * num_levels * num_points)
self.value_proj = nn.Linear(embed_dims, embed_dims)
self.init_weights()
def init_weights(self):
"""Default initialization for Parameters of Module."""
# constant_init(self.sampling_offsets, 0.)
thetas = torch.arange(
self.num_heads,
dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
grid_init = (grid_init /
grid_init.abs().max(-1, keepdim=True)[0]).view(
self.num_heads, 1, 1,
2).repeat(1, self.num_levels, self.num_points, 1)
for i in range(self.num_points):
grid_init[:, :, i, :] *= i + 1
# self.sampling_offsets.bias.data = grid_init.view(-1)
self.fixed_sampling_offsets = nn.Parameter(grid_init.view(-1), requires_grad=False)
# constant_init(self.attention_weights, val=0., bias=0.)
xavier_init(self.value_proj, distribution='uniform', bias=0.)
xavier_init(self.output_proj, distribution='uniform', bias=0.)
self._is_init = True
def forward(self,
query,
key=None,
value=None,
identity=None,
query_pos=None,
key_padding_mask=None,
reference_points=None,
spatial_shapes=None,
level_start_index=None,
**kwargs):
"""Forward Function of MultiScaleDeformAttention.
Args:
query (Tensor): Query of Transformer with shape
( bs, num_query, embed_dims).
key (Tensor): The key tensor with shape
`(bs, num_key, embed_dims)`.
value (Tensor): The value tensor with shape
`(bs, num_key, embed_dims)`.
identity (Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None,
`query` will be used.
query_pos (Tensor): The positional encoding for `query`.
Default: None.
key_pos (Tensor): The positional encoding for `key`. Default
None.
reference_points (Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to
form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2),
last dimension represents (h, w).
level_start_index (Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
"""
if value is None:
value = query
if identity is None:
identity = query
if query_pos is not None:
query = query + query_pos
if not self.batch_first:
# change to (bs, num_query ,embed_dims)
query = query.permute(1, 0, 2)
value = value.permute(1, 0, 2)
bs, num_query, _ = query.shape
bs, num_value, _ = value.shape
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
value = self.value_proj(value)
if key_padding_mask is not None:
value = value.masked_fill(key_padding_mask[..., None], 0.0)
value = value.view(bs, num_value, self.num_heads, -1)
sampling_offsets = self.fixed_sampling_offsets.view(
1, 1, self.num_heads, self.num_levels, self.num_points, 2).repeat(
bs, num_query, 1, 1, 1,1)
# attention_weights = self.attention_weights(query).view(
# bs, num_query, self.num_heads, self.num_levels * self.num_points)
attention_weights = query.new_ones((bs, num_query, self.num_heads, self.num_levels * self.num_points))
attention_weights = attention_weights.softmax(-1)
# import pdb;pdb.set_trace()
attention_weights = attention_weights.view(bs, num_query,
self.num_heads,
self.num_levels,
self.num_points)
if reference_points.shape[-1] == 2:
"""
For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.
After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.
For each referent point, we sample `num_points` sampling points.
For `num_Z_anchors` reference points, it has overall `num_points * num_Z_anchors` sampling points.
"""
offset_normalizer = torch.stack(
[spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
bs, num_query, num_Z_anchors, xy = reference_points.shape
reference_points = reference_points[:, :, None, None, None, :, :]
sampling_offsets = sampling_offsets / \
offset_normalizer[None, None, None, :, None, :]
bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape
sampling_offsets = sampling_offsets.view(
bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)
sampling_locations = reference_points + sampling_offsets
bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape
assert num_all_points == num_points * num_Z_anchors
sampling_locations = sampling_locations.view(
bs, num_query, num_heads, num_levels, num_all_points, xy)
elif reference_points.shape[-1] == 4:
assert False
else:
raise ValueError(
f'Last dim of reference_points must be'
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
# sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2
# attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points
#
if torch.cuda.is_available() and value.is_cuda:
if value.dtype == torch.float16:
MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
else:
MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
output = MultiScaleDeformableAttnFunction.apply(
value, spatial_shapes, level_start_index, sampling_locations,
attention_weights, self.im2col_step)
else:
output = multi_scale_deformable_attn_pytorch(
value, spatial_shapes, sampling_locations, attention_weights)
if not self.batch_first:
output = output.permute(1, 0, 2)
return output
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from projects.mmdet3d_plugin.models.utils.bricks import run_time
from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32
from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
import warnings
import torch
import torch.nn as nn
from mmcv.cnn import xavier_init, constant_init
from mmcv.cnn.bricks.registry import ATTENTION
import math
from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,
to_2tuple)
from mmcv.utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
@ATTENTION.register_module()
class TemporalSelfAttention(BaseModule):
"""An attention module used in BEVFormer based on Deformable-Detr.
`Deformable DETR: Deformable Transformers for End-to-End Object Detection.
<https://arxiv.org/pdf/2010.04159.pdf>`_.
Args:
embed_dims (int): The embedding dimension of Attention.
Default: 256.
num_heads (int): Parallel attention heads. Default: 64.
num_levels (int): The number of feature map used in
Attention. Default: 4.
num_points (int): The number of sampling points for
each query in each head. Default: 4.
im2col_step (int): The step used in image_to_column.
Default: 64.
dropout (float): A Dropout layer on `inp_identity`.
Default: 0.1.
batch_first (bool): Key, Query and Value are shape of
(batch, n, embed_dim)
or (n, batch, embed_dim). Default to True.
norm_cfg (dict): Config dict for normalization layer.
Default: None.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
num_bev_queue (int): In this version, we only use one history BEV and one currenct BEV.
the length of BEV queue is 2.
"""
def __init__(self,
embed_dims=256,
num_heads=8,
num_levels=4,
num_points=4,
num_bev_queue=2,
im2col_step=64,
dropout=0.1,
batch_first=True,
norm_cfg=None,
init_cfg=None):
super().__init__(init_cfg)
if embed_dims % num_heads != 0:
raise ValueError(f'embed_dims must be divisible by num_heads, '
f'but got {embed_dims} and {num_heads}')
dim_per_head = embed_dims // num_heads
self.norm_cfg = norm_cfg
self.dropout = nn.Dropout(dropout)
self.batch_first = batch_first
self.fp16_enabled = False
# you'd better set dim_per_head to a power of 2
# which is more efficient in the CUDA implementation
def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0):
raise ValueError(
'invalid input for _is_power_of_2: {} (type: {})'.format(
n, type(n)))
return (n & (n - 1) == 0) and n != 0
if not _is_power_of_2(dim_per_head):
warnings.warn(
"You'd better set embed_dims in "
'MultiScaleDeformAttention to make '
'the dimension of each attention head a power of 2 '
'which is more efficient in our CUDA implementation.')
self.im2col_step = im2col_step
self.embed_dims = embed_dims
self.num_levels = num_levels
self.num_heads = num_heads
self.num_points = num_points
self.num_bev_queue = num_bev_queue
self.sampling_offsets = nn.Linear(
embed_dims*self.num_bev_queue, num_bev_queue*num_heads * num_levels * num_points * 2)
self.attention_weights = nn.Linear(embed_dims*self.num_bev_queue,
num_bev_queue*num_heads * num_levels * num_points)
self.value_proj = nn.Linear(embed_dims, embed_dims)
self.output_proj = nn.Linear(embed_dims, embed_dims)
self.init_weights()
def init_weights(self):
"""Default initialization for Parameters of Module."""
constant_init(self.sampling_offsets, 0.)
thetas = torch.arange(
self.num_heads,
dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
grid_init = (grid_init /
grid_init.abs().max(-1, keepdim=True)[0]).view(
self.num_heads, 1, 1,
2).repeat(1, self.num_levels*self.num_bev_queue, self.num_points, 1)
for i in range(self.num_points):
grid_init[:, :, i, :] *= i + 1
self.sampling_offsets.bias.data = grid_init.view(-1)
constant_init(self.attention_weights, val=0., bias=0.)
xavier_init(self.value_proj, distribution='uniform', bias=0.)
xavier_init(self.output_proj, distribution='uniform', bias=0.)
self._is_init = True
def forward(self,
query,
key=None,
value=None,
identity=None,
query_pos=None,
key_padding_mask=None,
reference_points=None,
spatial_shapes=None,
level_start_index=None,
flag='decoder',
**kwargs):
"""Forward Function of MultiScaleDeformAttention.
Args:
query (Tensor): Query of Transformer with shape
(num_query, bs, embed_dims).
key (Tensor): The key tensor with shape
`(num_key, bs, embed_dims)`.
value (Tensor): The value tensor with shape
`(num_key, bs, embed_dims)`.
identity (Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None,
`query` will be used.
query_pos (Tensor): The positional encoding for `query`.
Default: None.
key_pos (Tensor): The positional encoding for `key`. Default
None.
reference_points (Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to
form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2),
last dimension represents (h, w).
level_start_index (Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
"""
if value is None:
assert self.batch_first
bs, len_bev, c = query.shape
value = torch.stack([query, query], 1).reshape(bs*2, len_bev, c)
# value = torch.cat([query, query], 0)
if identity is None:
identity = query
if query_pos is not None:
query = query + query_pos
if not self.batch_first:
# change to (bs, num_query ,embed_dims)
query = query.permute(1, 0, 2)
value = value.permute(1, 0, 2)
bs, num_query, embed_dims = query.shape
_, num_value, _ = value.shape
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
assert self.num_bev_queue == 2
query = torch.cat([value[:bs], query], -1)
value = self.value_proj(value)
if key_padding_mask is not None:
value = value.masked_fill(key_padding_mask[..., None], 0.0)
value = value.reshape(bs*self.num_bev_queue,
num_value, self.num_heads, -1)
sampling_offsets = self.sampling_offsets(query)
sampling_offsets = sampling_offsets.view(
bs, num_query, self.num_heads, self.num_bev_queue, self.num_levels, self.num_points, 2)
attention_weights = self.attention_weights(query).view(
bs, num_query, self.num_heads, self.num_bev_queue, self.num_levels * self.num_points)
attention_weights = attention_weights.softmax(-1)
attention_weights = attention_weights.view(bs, num_query,
self.num_heads,
self.num_bev_queue,
self.num_levels,
self.num_points)
attention_weights = attention_weights.permute(0, 3, 1, 2, 4, 5)\
.reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points).contiguous()
sampling_offsets = sampling_offsets.permute(0, 3, 1, 2, 4, 5, 6)\
.reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points, 2)
if reference_points.shape[-1] == 2:
offset_normalizer = torch.stack(
[spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
sampling_locations = reference_points[:, :, None, :, None, :] \
+ sampling_offsets \
/ offset_normalizer[None, None, None, :, None, :]
elif reference_points.shape[-1] == 4:
sampling_locations = reference_points[:, :, None, :, None, :2] \
+ sampling_offsets / self.num_points \
* reference_points[:, :, None, :, None, 2:] \
* 0.5
else:
raise ValueError(
f'Last dim of reference_points must be'
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
if torch.cuda.is_available() and value.is_cuda:
# using fp16 deformable attention is unstable because it performs many sum operations
if value.dtype == torch.float16:
MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
else:
MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
output = MultiScaleDeformableAttnFunction.apply(
value, spatial_shapes, level_start_index, sampling_locations,
attention_weights, self.im2col_step)
else:
output = multi_scale_deformable_attn_pytorch(
value, spatial_shapes, sampling_locations, attention_weights)
# output shape (bs*num_bev_queue, num_query, embed_dims)
# (bs*num_bev_queue, num_query, embed_dims)-> (num_query, embed_dims, bs*num_bev_queue)
output = output.permute(1, 2, 0)
# fuse history value and current value
# (num_query, embed_dims, bs*num_bev_queue)-> (num_query, embed_dims, bs, num_bev_queue)
output = output.view(num_query, embed_dims, bs, self.num_bev_queue)
output = output.mean(-1)
# (num_query, embed_dims, bs)-> (bs, num_query, embed_dims)
output = output.permute(2, 0, 1)
output = self.output_proj(output)
if not self.batch_first:
output = output.permute(1, 0, 2)
return self.dropout(output) + identity
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import numpy as np
import torch
import torch.nn as nn
from mmcv.cnn import xavier_init
from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence
from mmcv.runner.base_module import BaseModule
from mmdet.models.utils.builder import TRANSFORMER
from torch.nn.init import normal_
from projects.mmdet3d_plugin.models.utils.visual import save_tensor
from mmcv.runner.base_module import BaseModule
from torchvision.transforms.functional import rotate
from .temporal_self_attention import TemporalSelfAttention
from .spatial_cross_attention import MSDeformableAttention3D
from .decoder import CustomMSDeformableAttention
from projects.mmdet3d_plugin.models.utils.bricks import run_time
from mmcv.runner import force_fp32, auto_fp16
@TRANSFORMER.register_module()
class PerceptionTransformer(BaseModule):
"""Implements the Detr3D transformer.
Args:
as_two_stage (bool): Generate query from encoder features.
Default: False.
num_feature_levels (int): Number of feature maps from FPN:
Default: 4.
two_stage_num_proposals (int): Number of proposals when set
`as_two_stage` as True. Default: 300.
"""
def __init__(self,
num_feature_levels=4,
num_cams=6,
two_stage_num_proposals=300,
encoder=None,
decoder=None,
embed_dims=256,
rotate_prev_bev=True,
use_shift=True,
use_can_bus=True,
can_bus_norm=True,
use_cams_embeds=True,
rotate_center=[100, 100],
**kwargs):
super(PerceptionTransformer, self).__init__(**kwargs)
self.encoder = build_transformer_layer_sequence(encoder)
self.decoder = build_transformer_layer_sequence(decoder)
self.embed_dims = embed_dims
self.num_feature_levels = num_feature_levels
self.num_cams = num_cams
self.fp16_enabled = False
self.rotate_prev_bev = rotate_prev_bev
self.use_shift = use_shift
self.use_can_bus = use_can_bus
self.can_bus_norm = can_bus_norm
self.use_cams_embeds = use_cams_embeds
self.two_stage_num_proposals = two_stage_num_proposals
self.init_layers()
self.rotate_center = rotate_center
def init_layers(self):
"""Initialize layers of the Detr3DTransformer."""
self.level_embeds = nn.Parameter(torch.Tensor(
self.num_feature_levels, self.embed_dims))
self.cams_embeds = nn.Parameter(
torch.Tensor(self.num_cams, self.embed_dims))
self.reference_points = nn.Linear(self.embed_dims, 3)
self.can_bus_mlp = nn.Sequential(
nn.Linear(18, self.embed_dims // 2),
nn.ReLU(inplace=True),
nn.Linear(self.embed_dims // 2, self.embed_dims),
nn.ReLU(inplace=True),
)
if self.can_bus_norm:
self.can_bus_mlp.add_module('norm', nn.LayerNorm(self.embed_dims))
def init_weights(self):
"""Initialize the transformer weights."""
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
for m in self.modules():
if isinstance(m, MSDeformableAttention3D) or isinstance(m, TemporalSelfAttention) \
or isinstance(m, CustomMSDeformableAttention):
try:
m.init_weight()
except AttributeError:
m.init_weights()
normal_(self.level_embeds)
normal_(self.cams_embeds)
xavier_init(self.reference_points, distribution='uniform', bias=0.)
xavier_init(self.can_bus_mlp, distribution='uniform', bias=0.)
@auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'prev_bev', 'bev_pos'))
def get_bev_features(
self,
mlvl_feats,
bev_queries,
bev_h,
bev_w,
grid_length=[0.512, 0.512],
bev_pos=None,
prev_bev=None,
**kwargs):
"""
obtain bev features.
"""
bs = mlvl_feats[0].size(0)
bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1)
bev_pos = bev_pos.flatten(2).permute(2, 0, 1)
# obtain rotation angle and shift with ego motion
delta_x = np.array([each['can_bus'][0]
for each in kwargs['img_metas']])
delta_y = np.array([each['can_bus'][1]
for each in kwargs['img_metas']])
ego_angle = np.array(
[each['can_bus'][-2] / np.pi * 180 for each in kwargs['img_metas']])
grid_length_y = grid_length[0]
grid_length_x = grid_length[1]
translation_length = np.sqrt(delta_x ** 2 + delta_y ** 2)
translation_angle = np.arctan2(delta_y, delta_x) / np.pi * 180
bev_angle = ego_angle - translation_angle
shift_y = translation_length * \
np.cos(bev_angle / 180 * np.pi) / grid_length_y / bev_h
shift_x = translation_length * \
np.sin(bev_angle / 180 * np.pi) / grid_length_x / bev_w
shift_y = shift_y * self.use_shift
shift_x = shift_x * self.use_shift
shift = bev_queries.new_tensor(
[shift_x, shift_y]).permute(1, 0) # xy, bs -> bs, xy
if prev_bev is not None:
if prev_bev.shape[1] == bev_h * bev_w:
prev_bev = prev_bev.permute(1, 0, 2)
if self.rotate_prev_bev:
for i in range(bs):
# num_prev_bev = prev_bev.size(1)
rotation_angle = kwargs['img_metas'][i]['can_bus'][-1]
tmp_prev_bev = prev_bev[:, i].reshape(
bev_h, bev_w, -1).permute(2, 0, 1)
tmp_prev_bev = rotate(tmp_prev_bev, rotation_angle,
center=self.rotate_center)
tmp_prev_bev = tmp_prev_bev.permute(1, 2, 0).reshape(
bev_h * bev_w, 1, -1)
prev_bev[:, i] = tmp_prev_bev[:, 0]
# add can bus signals
can_bus = bev_queries.new_tensor(
[each['can_bus'] for each in kwargs['img_metas']]) # [:, :]
can_bus = self.can_bus_mlp(can_bus)[None, :, :]
bev_queries = bev_queries + can_bus * self.use_can_bus
feat_flatten = []
spatial_shapes = []
for lvl, feat in enumerate(mlvl_feats):
bs, num_cam, c, h, w = feat.shape
spatial_shape = (h, w)
feat = feat.flatten(3).permute(1, 0, 3, 2)
if self.use_cams_embeds:
feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype)
feat = feat + self.level_embeds[None,
None, lvl:lvl + 1, :].to(feat.dtype)
spatial_shapes.append(spatial_shape)
feat_flatten.append(feat)
feat_flatten = torch.cat(feat_flatten, 2)
spatial_shapes = torch.as_tensor(
spatial_shapes, dtype=torch.long, device=bev_pos.device)
level_start_index = torch.cat((spatial_shapes.new_zeros(
(1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))
feat_flatten = feat_flatten.permute(
0, 2, 1, 3) # (num_cam, H*W, bs, embed_dims)
bev_embed = self.encoder(
bev_queries,
feat_flatten,
feat_flatten,
bev_h=bev_h,
bev_w=bev_w,
bev_pos=bev_pos,
spatial_shapes=spatial_shapes,
level_start_index=level_start_index,
prev_bev=prev_bev,
shift=shift,
**kwargs
)
return bev_embed
@auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos'))
def forward(self,
mlvl_feats,
bev_queries,
object_query_embed,
bev_h,
bev_w,
grid_length=[0.512, 0.512],
bev_pos=None,
reg_branches=None,
cls_branches=None,
prev_bev=None,
**kwargs):
"""Forward function for `Detr3DTransformer`.
Args:
mlvl_feats (list(Tensor)): Input queries from
different level. Each element has shape
[bs, num_cams, embed_dims, h, w].
bev_queries (Tensor): (bev_h*bev_w, c)
bev_pos (Tensor): (bs, embed_dims, bev_h, bev_w)
object_query_embed (Tensor): The query embedding for decoder,
with shape [num_query, c].
reg_branches (obj:`nn.ModuleList`): Regression heads for
feature maps from each decoder layer. Only would
be passed when `with_box_refine` is True. Default to None.
Returns:
tuple[Tensor]: results of decoder containing the following tensor.
- bev_embed: BEV features
- inter_states: Outputs from decoder. If
return_intermediate_dec is True output has shape \
(num_dec_layers, bs, num_query, embed_dims), else has \
shape (1, bs, num_query, embed_dims).
- init_reference_out: The initial value of reference \
points, has shape (bs, num_queries, 4).
- inter_references_out: The internal value of reference \
points in decoder, has shape \
(num_dec_layers, bs,num_query, embed_dims)
- enc_outputs_class: The classification score of \
proposals generated from \
encoder's feature maps, has shape \
(batch, h*w, num_classes). \
Only would be returned when `as_two_stage` is True, \
otherwise None.
- enc_outputs_coord_unact: The regression results \
generated from encoder's feature maps., has shape \
(batch, h*w, 4). Only would \
be returned when `as_two_stage` is True, \
otherwise None.
"""
bev_embed = self.get_bev_features(
mlvl_feats,
bev_queries,
bev_h,
bev_w,
grid_length=grid_length,
bev_pos=bev_pos,
prev_bev=prev_bev,
**kwargs) # bev_embed shape: bs, bev_h*bev_w, embed_dims
bs = mlvl_feats[0].size(0)
query_pos, query = torch.split(
object_query_embed, self.embed_dims, dim=1)
query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1)
query = query.unsqueeze(0).expand(bs, -1, -1)
reference_points = self.reference_points(query_pos)
reference_points = reference_points.sigmoid()
init_reference_out = reference_points
query = query.permute(1, 0, 2)
query_pos = query_pos.permute(1, 0, 2)
bev_embed = bev_embed.permute(1, 0, 2)
inter_states, inter_references = self.decoder(
query=query,
key=None,
value=bev_embed,
query_pos=query_pos,
reference_points=reference_points,
reg_branches=reg_branches,
cls_branches=cls_branches,
spatial_shapes=torch.tensor([[bev_h, bev_w]], device=query.device),
level_start_index=torch.tensor([0], device=query.device),
**kwargs)
inter_references_out = inter_references
return bev_embed, inter_states, init_reference_out, inter_references_out
from .epoch_based_runner import EpochBasedRunner_video
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import os.path as osp
import torch
import mmcv
from mmcv.runner.base_runner import BaseRunner
from mmcv.runner.epoch_based_runner import EpochBasedRunner
from mmcv.runner.builder import RUNNERS
from mmcv.runner.checkpoint import save_checkpoint
from mmcv.runner.utils import get_host_info
from pprint import pprint
from mmcv.parallel.data_container import DataContainer
@RUNNERS.register_module()
class EpochBasedRunner_video(EpochBasedRunner):
'''
# basic logic
input_sequence = [a, b, c] # given a sequence of samples
prev_bev = None
for each in input_sequcene[:-1]
prev_bev = eval_model(each, prev_bev)) # inference only.
model(input_sequcene[-1], prev_bev) # train the last sample.
'''
def __init__(self,
model,
eval_model=None,
batch_processor=None,
optimizer=None,
work_dir=None,
logger=None,
meta=None,
keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'],
max_iters=None,
max_epochs=None):
super().__init__(model,
batch_processor,
optimizer,
work_dir,
logger,
meta,
max_iters,
max_epochs)
keys.append('img_metas')
self.keys = keys
self.eval_model = eval_model
self.eval_model.eval()
def run_iter(self, data_batch, train_mode, **kwargs):
if self.batch_processor is not None:
assert False
# outputs = self.batch_processor(
# self.model, data_batch, train_mode=train_mode, **kwargs)
elif train_mode:
num_samples = data_batch['img'].data[0].size(1)
data_list = []
prev_bev = None
for i in range(num_samples):
data = {}
for key in self.keys:
if key not in ['img_metas', 'img', 'points']:
data[key] = data_batch[key]
else:
if key == 'img':
data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True)
elif key == 'img_metas':
data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only)
else:
assert False
data_list.append(data)
with torch.no_grad():
for i in range(num_samples-1):
if i>0: data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs)
data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs)
else:
assert False
# outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
if not isinstance(outputs, dict):
raise TypeError('"batch_processor()" or "model.train_step()"'
'and "model.val_step()" must return a dict')
if 'log_vars' in outputs:
self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
self.outputs = outputs
\ No newline at end of file
from .hungarian_assigner_3d import HungarianAssigner3D
__all__ = ['HungarianAssigner3D']
import torch
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
from mmdet.core.bbox.assigners import AssignResult
from mmdet.core.bbox.assigners import BaseAssigner
from mmdet.core.bbox.match_costs import build_match_cost
from mmdet.models.utils.transformer import inverse_sigmoid
from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
try:
from scipy.optimize import linear_sum_assignment
except ImportError:
linear_sum_assignment = None
@BBOX_ASSIGNERS.register_module()
class HungarianAssigner3D(BaseAssigner):
"""Computes one-to-one matching between predictions and ground truth.
This class computes an assignment between the targets and the predictions
based on the costs. The costs are weighted sum of three components:
classification cost, regression L1 cost and regression iou cost. The
targets don't include the no_object, so generally there are more
predictions than targets. After the one-to-one matching, the un-matched
are treated as backgrounds. Thus each query prediction will be assigned
with `0` or a positive integer indicating the ground truth index:
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
bbox_weight (int | float, optional): The scale factor for regression
L1 cost. Default 1.0.
iou_weight (int | float, optional): The scale factor for regression
iou cost. Default 1.0.
iou_calculator (dict | optional): The config for the iou calculation.
Default type `BboxOverlaps2D`.
iou_mode (str | optional): "iou" (intersection over union), "iof"
(intersection over foreground), or "giou" (generalized
intersection over union). Default "giou".
"""
def __init__(self,
cls_cost=dict(type='ClassificationCost', weight=1.),
reg_cost=dict(type='BBoxL1Cost', weight=1.0),
iou_cost=dict(type='IoUCost', weight=0.0),
pc_range=None):
self.cls_cost = build_match_cost(cls_cost)
self.reg_cost = build_match_cost(reg_cost)
self.iou_cost = build_match_cost(iou_cost)
self.pc_range = pc_range
def assign(self,
bbox_pred,
cls_pred,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
eps=1e-7):
"""Computes one-to-one matching based on the weighted costs.
This method assign each query prediction to a ground truth or
background. The `assigned_gt_inds` with -1 means don't care,
0 means negative sample, and positive number is the index (1-based)
of assigned gt.
The assignment is done in the following steps, the order matters.
1. assign every prediction to -1
2. compute the weighted costs
3. do Hungarian matching on CPU based on the costs
4. assign all to 0 (background) first, then for each matched pair
between predictions and gts, treat this prediction as foreground
and assign the corresponding gt index (plus 1) to it.
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`. Default None.
eps (int | float, optional): A value added to the denominator for
numerical stability. Default 1e-7.
Returns:
:obj:`AssignResult`: The assigned result.
"""
assert gt_bboxes_ignore is None, \
'Only case when gt_bboxes_ignore is None is supported.'
num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
# 1. assign -1 by default
assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
-1,
dtype=torch.long)
assigned_labels = bbox_pred.new_full((num_bboxes, ),
-1,
dtype=torch.long)
if num_gts == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
if num_gts == 0:
# No ground truth, assign all to background
assigned_gt_inds[:] = 0
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
# 2. compute the weighted costs
# classification and bboxcost.
cls_cost = self.cls_cost(cls_pred, gt_labels)
# regression L1 cost
normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
# weighted sum of above two costs
cost = cls_cost + reg_cost
# 3. do Hungarian matching on CPU using linear_sum_assignment
cost = cost.detach().cpu()
if linear_sum_assignment is None:
raise ImportError('Please run "pip install scipy" '
'to install scipy first.')
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
matched_row_inds = torch.from_numpy(matched_row_inds).to(
bbox_pred.device)
matched_col_inds = torch.from_numpy(matched_col_inds).to(
bbox_pred.device)
# 4. assign backgrounds and foregrounds
# assign all indices to backgrounds first
assigned_gt_inds[:] = 0
# assign foregrounds based on matching results
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
\ No newline at end of file
from .nms_free_coder import NMSFreeCoder, MapTRNMSFreeCoder
__all__ = ['NMSFreeCoder', 'MapTRNMSFreeCoder']
import torch
from mmdet.core.bbox import BaseBBoxCoder
from mmdet.core.bbox.builder import BBOX_CODERS
from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
import numpy as np
from mmdet.core.bbox.transforms import bbox_xyxy_to_cxcywh, bbox_cxcywh_to_xyxy
def denormalize_3d_pts(pts, pc_range):
new_pts = pts.clone()
new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -
pc_range[0]) + pc_range[0])
new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -
pc_range[1]) + pc_range[1])
new_pts[...,2:3] = (pts[...,2:3]*(pc_range[5] -
pc_range[2]) + pc_range[2])
return new_pts
def normalize_3d_pts(pts, pc_range):
patch_h = pc_range[4]-pc_range[1]
patch_w = pc_range[3]-pc_range[0]
patch_z = pc_range[5]-pc_range[2]
new_pts = pts.clone()
new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]
new_pts[...,1:2] = pts[...,1:2] - pc_range[1]
new_pts[...,2:3] = pts[...,2:3] - pc_range[2]
factor = pts.new_tensor([patch_w, patch_h,patch_z])
normalized_pts = new_pts / factor
return normalized_pts
def normalize_2d_bbox(bboxes, pc_range):
patch_h = pc_range[4]-pc_range[1]
patch_w = pc_range[3]-pc_range[0]
cxcywh_bboxes = bbox_xyxy_to_cxcywh(bboxes)
cxcywh_bboxes[...,0:1] = cxcywh_bboxes[..., 0:1] - pc_range[0]
cxcywh_bboxes[...,1:2] = cxcywh_bboxes[...,1:2] - pc_range[1]
factor = bboxes.new_tensor([patch_w, patch_h,patch_w,patch_h])
normalized_bboxes = cxcywh_bboxes / factor
return normalized_bboxes
def normalize_2d_pts(pts, pc_range):
patch_h = pc_range[4]-pc_range[1]
patch_w = pc_range[3]-pc_range[0]
new_pts = pts.clone()
new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]
new_pts[...,1:2] = pts[...,1:2] - pc_range[1]
factor = pts.new_tensor([patch_w, patch_h])
normalized_pts = new_pts / factor
return normalized_pts
def denormalize_2d_bbox(bboxes, pc_range):
bboxes = bbox_cxcywh_to_xyxy(bboxes)
bboxes[..., 0::2] = (bboxes[..., 0::2]*(pc_range[3] -
pc_range[0]) + pc_range[0])
bboxes[..., 1::2] = (bboxes[..., 1::2]*(pc_range[4] -
pc_range[1]) + pc_range[1])
return bboxes
def denormalize_2d_pts(pts, pc_range):
new_pts = pts.clone()
new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -
pc_range[0]) + pc_range[0])
new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -
pc_range[1]) + pc_range[1])
return new_pts
@BBOX_CODERS.register_module()
class NMSFreeCoder(BaseBBoxCoder):
"""Bbox coder for NMS-free detector.
Args:
pc_range (list[float]): Range of point cloud.
post_center_range (list[float]): Limit of the center.
Default: None.
max_num (int): Max number to be kept. Default: 100.
score_threshold (float): Threshold to filter boxes based on score.
Default: None.
code_size (int): Code size of bboxes. Default: 9
"""
def __init__(self,
pc_range,
voxel_size=None,
post_center_range=None,
max_num=100,
score_threshold=None,
num_classes=10):
self.pc_range = pc_range
self.voxel_size = voxel_size
self.post_center_range = post_center_range
self.max_num = max_num
self.score_threshold = score_threshold
self.num_classes = num_classes
def encode(self):
pass
def decode_single(self, cls_scores, bbox_preds):
"""Decode bboxes.
Args:
cls_scores (Tensor): Outputs from the classification head, \
shape [num_query, cls_out_channels]. Note \
cls_out_channels should includes background.
bbox_preds (Tensor): Outputs from the regression \
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
Shape [num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
max_num = self.max_num
cls_scores = cls_scores.sigmoid()
scores, indexs = cls_scores.view(-1).topk(max_num)
labels = indexs % self.num_classes
bbox_index = indexs // self.num_classes
bbox_preds = bbox_preds[bbox_index]
final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
final_scores = scores
final_preds = labels
# use score threshold
if self.score_threshold is not None:
thresh_mask = final_scores > self.score_threshold
tmp_score = self.score_threshold
while thresh_mask.sum() == 0:
tmp_score *= 0.9
if tmp_score < 0.01:
thresh_mask = final_scores > -1
break
thresh_mask = final_scores >= tmp_score
if self.post_center_range is not None:
self.post_center_range = torch.tensor(
self.post_center_range, device=scores.device)
mask = (final_box_preds[..., :3] >=
self.post_center_range[:3]).all(1)
mask &= (final_box_preds[..., :3] <=
self.post_center_range[3:]).all(1)
if self.score_threshold:
mask &= thresh_mask
boxes3d = final_box_preds[mask]
scores = final_scores[mask]
labels = final_preds[mask]
predictions_dict = {
'bboxes': boxes3d,
'scores': scores,
'labels': labels
}
else:
raise NotImplementedError(
'Need to reorganize output as a batch, only '
'support post_center_range is not None for now!')
return predictions_dict
def decode(self, preds_dicts):
"""Decode bboxes.
Args:
all_cls_scores (Tensor): Outputs from the classification head, \
shape [nb_dec, bs, num_query, cls_out_channels]. Note \
cls_out_channels should includes background.
all_bbox_preds (Tensor): Sigmoid outputs from the regression \
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
Shape [nb_dec, bs, num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
all_cls_scores = preds_dicts['all_cls_scores'][-1]
all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
batch_size = all_cls_scores.size()[0]
predictions_list = []
for i in range(batch_size):
predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
return predictions_list
@BBOX_CODERS.register_module()
class MapTRNMSFreeCoder(BaseBBoxCoder):
"""Bbox coder for NMS-free detector.
Args:
pc_range (list[float]): Range of point cloud.
post_center_range (list[float]): Limit of the center.
Default: None.
max_num (int): Max number to be kept. Default: 100.
score_threshold (float): Threshold to filter boxes based on score.
Default: None.
code_size (int): Code size of bboxes. Default: 9
"""
def __init__(self,
pc_range,
z_cfg = dict(
pred_z_flag=False,
gt_z_flag=False,
),
voxel_size=None,
post_center_range=None,
max_num=100,
score_threshold=None,
num_classes=10):
self.pc_range = pc_range
self.voxel_size = voxel_size
self.post_center_range = post_center_range
self.max_num = max_num
self.score_threshold = score_threshold
self.num_classes = num_classes
self.z_cfg = z_cfg
def encode(self):
pass
def decode_single(self, cls_scores, bbox_preds, pts_preds):
"""Decode bboxes.
Args:
cls_scores (Tensor): Outputs from the classification head, \
shape [num_query, cls_out_channels]. Note \
cls_out_channels should includes background.
bbox_preds (Tensor): Outputs from the regression \
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
Shape [num_query, 9].
pts_preds (Tensor):
Shape [num_query, fixed_num_pts, 2]
Returns:
list[dict]: Decoded boxes.
"""
max_num = self.max_num
cls_scores = cls_scores.sigmoid()
scores, indexs = cls_scores.view(-1).topk(max_num)
labels = indexs % self.num_classes
bbox_index = indexs // self.num_classes
bbox_preds = bbox_preds[bbox_index]
pts_preds = pts_preds[bbox_index]
final_box_preds = denormalize_2d_bbox(bbox_preds, self.pc_range)
#num_q,num_p,2
final_pts_preds = denormalize_2d_pts(pts_preds, self.pc_range) if not self.z_cfg['gt_z_flag'] \
else denormalize_3d_pts(pts_preds, self.pc_range)
# final_box_preds = bbox_preds
final_scores = scores
final_preds = labels
# use score threshold
if self.score_threshold is not None:
thresh_mask = final_scores > self.score_threshold
tmp_score = self.score_threshold
while thresh_mask.sum() == 0:
tmp_score *= 0.9
if tmp_score < 0.01:
thresh_mask = final_scores > -1
break
thresh_mask = final_scores >= tmp_score
if self.post_center_range is not None:
self.post_center_range = torch.tensor(
self.post_center_range, device=scores.device)
mask = (final_box_preds[..., :4] >=
self.post_center_range[:4]).all(1)
mask &= (final_box_preds[..., :4] <=
self.post_center_range[4:]).all(1)
if self.score_threshold:
mask &= thresh_mask
boxes3d = final_box_preds[mask]
scores = final_scores[mask]
pts = final_pts_preds[mask]
labels = final_preds[mask]
predictions_dict = {
'bboxes': boxes3d,
'scores': scores,
'labels': labels,
'pts': pts,
}
else:
raise NotImplementedError(
'Need to reorganize output as a batch, only '
'support post_center_range is not None for now!')
return predictions_dict
def decode(self, preds_dicts):
"""Decode bboxes.
Args:
all_cls_scores (Tensor): Outputs from the classification head, \
shape [nb_dec, bs, num_query, cls_out_channels]. Note \
cls_out_channels should includes background.
all_bbox_preds (Tensor): Sigmoid outputs from the regression \
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
Shape [nb_dec, bs, num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
all_cls_scores = preds_dicts['all_cls_scores'][-1]
all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
all_pts_preds = preds_dicts['all_pts_preds'][-1]
batch_size = all_cls_scores.size()[0]
predictions_list = []
for i in range(batch_size):
predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i],all_pts_preds[i]))
return predictions_list
from mmdet.core.bbox.match_costs import build_match_cost
from .match_cost import BBox3DL1Cost
__all__ = ['build_match_cost', 'BBox3DL1Cost']
\ No newline at end of file
import torch
from mmdet.core.bbox.match_costs.builder import MATCH_COST
@MATCH_COST.register_module()
class BBox3DL1Cost(object):
"""BBox3DL1Cost.
Args:
weight (int | float, optional): loss_weight
"""
def __init__(self, weight=1.):
self.weight = weight
def __call__(self, bbox_pred, gt_bboxes):
"""
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with normalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: bbox_cost value with weight
"""
bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
return bbox_cost * self.weight
\ No newline at end of file
import torch
def normalize_bbox(bboxes, pc_range):
cx = bboxes[..., 0:1]
cy = bboxes[..., 1:2]
cz = bboxes[..., 2:3]
w = bboxes[..., 3:4].log()
l = bboxes[..., 4:5].log()
h = bboxes[..., 5:6].log()
rot = bboxes[..., 6:7]
if bboxes.size(-1) > 7:
vx = bboxes[..., 7:8]
vy = bboxes[..., 8:9]
normalized_bboxes = torch.cat(
(cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
)
else:
normalized_bboxes = torch.cat(
(cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
)
return normalized_bboxes
def denormalize_bbox(normalized_bboxes, pc_range):
# rotation
rot_sine = normalized_bboxes[..., 6:7]
rot_cosine = normalized_bboxes[..., 7:8]
rot = torch.atan2(rot_sine, rot_cosine)
# center in the bev
cx = normalized_bboxes[..., 0:1]
cy = normalized_bboxes[..., 1:2]
cz = normalized_bboxes[..., 4:5]
# size
w = normalized_bboxes[..., 2:3]
l = normalized_bboxes[..., 3:4]
h = normalized_bboxes[..., 5:6]
w = w.exp()
l = l.exp()
h = h.exp()
if normalized_bboxes.size(-1) > 8:
# velocity
vx = normalized_bboxes[:, 8:9]
vy = normalized_bboxes[:, 9:10]
denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
else:
denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
return denormalized_bboxes
\ No newline at end of file
from .eval_hooks import CustomDistEvalHook
\ No newline at end of file
# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
# in order to avoid strong version dependency, we did not directly
# inherit EvalHook but BaseDistEvalHook.
import bisect
import os.path as osp
import mmcv
import torch.distributed as dist
from mmcv.runner import DistEvalHook as BaseDistEvalHook
from mmcv.runner import EvalHook as BaseEvalHook
from torch.nn.modules.batchnorm import _BatchNorm
from mmdet.core.evaluation.eval_hooks import DistEvalHook
def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
assert mmcv.is_list_of(dynamic_interval_list, tuple)
dynamic_milestones = [0]
dynamic_milestones.extend(
[dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
dynamic_intervals = [start_interval]
dynamic_intervals.extend(
[dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
return dynamic_milestones, dynamic_intervals
class CustomDistEvalHook(BaseDistEvalHook):
def __init__(self, *args, dynamic_intervals=None, **kwargs):
super(CustomDistEvalHook, self).__init__(*args, **kwargs)
self.use_dynamic_intervals = dynamic_intervals is not None
if self.use_dynamic_intervals:
self.dynamic_milestones, self.dynamic_intervals = \
_calc_dynamic_intervals(self.interval, dynamic_intervals)
def _decide_interval(self, runner):
if self.use_dynamic_intervals:
progress = runner.epoch if self.by_epoch else runner.iter
step = bisect.bisect(self.dynamic_milestones, (progress + 1))
# Dynamically modify the evaluation interval
self.interval = self.dynamic_intervals[step - 1]
def before_train_epoch(self, runner):
"""Evaluate the model only at the start of training by epoch."""
self._decide_interval(runner)
super().before_train_epoch(runner)
def before_train_iter(self, runner):
self._decide_interval(runner)
super().before_train_iter(runner)
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if self.broadcast_bn_buffer:
model = runner.model
for name, module in model.named_modules():
if isinstance(module,
_BatchNorm) and module.track_running_stats:
dist.broadcast(module.running_var, 0)
dist.broadcast(module.running_mean, 0)
if not self._should_evaluate(runner):
return
tmpdir = self.tmpdir
if tmpdir is None:
tmpdir = osp.join(runner.work_dir, '.eval_hook')
from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import
results = custom_multi_gpu_test(
runner.model,
self.dataloader,
tmpdir=tmpdir,
gpu_collect=self.gpu_collect)
if runner.rank == 0:
print('\n')
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
key_score = self.evaluate(runner, results)
if self.save_best:
self._save_ckpt(runner, key_score)
# Copyright (c) OpenMMLab. All rights reserved.
r"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
try:
from waymo_open_dataset import dataset_pb2 as open_dataset
import mmcv
import numpy as np
import tensorflow as tf
from glob import glob
from os.path import join
from waymo_open_dataset import label_pb2
from waymo_open_dataset.protos import metrics_pb2
except ImportError:
#pass
raise ImportError(
'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
'to install the official devkit first.')
class KITTI2Waymo(object):
"""KITTI predictions to Waymo converter.
This class serves as the converter to change predictions from KITTI to
Waymo format.
Args:
kitti_result_files (list[dict]): Predictions in KITTI format.
waymo_tfrecords_dir (str): Directory to load waymo raw data.
waymo_results_save_dir (str): Directory to save converted predictions
in waymo format (.bin files).
waymo_results_final_path (str): Path to save combined
predictions in waymo format (.bin file), like 'a/b/c.bin'.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (str): Number of parallel processes.
"""
def __init__(self,
kitti_result_files,
waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path,
prefix,
workers=64):
self.kitti_result_files = kitti_result_files
self.waymo_tfrecords_dir = waymo_tfrecords_dir
self.waymo_results_save_dir = waymo_results_save_dir
self.waymo_results_final_path = waymo_results_final_path
self.prefix = prefix
self.workers = int(workers)
self.name2idx = {}
for idx, result in enumerate(kitti_result_files):
if len(result['sample_idx']) > 0:
self.name2idx[str(result['sample_idx'][0])] = idx
# turn on eager execution for older tensorflow versions
if int(tf.__version__.split('.')[0]) < 2:
tf.enable_eager_execution()
self.k2w_cls_map = {
'Car': label_pb2.Label.TYPE_VEHICLE,
'Pedestrian': label_pb2.Label.TYPE_PEDESTRIAN,
'Sign': label_pb2.Label.TYPE_SIGN,
'Cyclist': label_pb2.Label.TYPE_CYCLIST,
}
self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0],
[-1.0, 0.0, 0.0, 0.0],
[0.0, -1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0]])
self.get_file_names()
self.create_folder()
def get_file_names(self):
"""Get file names of waymo raw data."""
self.waymo_tfrecord_pathnames = sorted(
glob(join(self.waymo_tfrecords_dir, '*.tfrecord')))
print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.')
def create_folder(self):
"""Create folder for data conversion."""
mmcv.mkdir_or_exist(self.waymo_results_save_dir)
def parse_objects(self, kitti_result, T_k2w, context_name,
frame_timestamp_micros):
"""Parse one prediction with several instances in kitti format and
convert them to `Object` proto.
Args:
kitti_result (dict): Predictions in kitti format.
- name (np.ndarray): Class labels of predictions.
- dimensions (np.ndarray): Height, width, length of boxes.
- location (np.ndarray): Bottom center of boxes (x, y, z).
- rotation_y (np.ndarray): Orientation of boxes.
- score (np.ndarray): Scores of predictions.
T_k2w (np.ndarray): Transformation matrix from kitti to waymo.
context_name (str): Context name of the frame.
frame_timestamp_micros (int): Frame timestamp.
Returns:
:obj:`Object`: Predictions in waymo dataset Object proto.
"""
def parse_one_object(instance_idx):
"""Parse one instance in kitti format and convert them to `Object`
proto.
Args:
instance_idx (int): Index of the instance to be converted.
Returns:
:obj:`Object`: Predicted instance in waymo dataset \
Object proto.
"""
cls = kitti_result['name'][instance_idx]
length = round(kitti_result['dimensions'][instance_idx, 0], 4)
height = round(kitti_result['dimensions'][instance_idx, 1], 4)
width = round(kitti_result['dimensions'][instance_idx, 2], 4)
x = round(kitti_result['location'][instance_idx, 0], 4)
y = round(kitti_result['location'][instance_idx, 1], 4)
z = round(kitti_result['location'][instance_idx, 2], 4)
rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)
score = round(kitti_result['score'][instance_idx], 4)
# y: downwards; move box origin from bottom center (kitti) to
# true center (waymo)
y -= height / 2
# frame transformation: kitti -> waymo
x, y, z = self.transform(T_k2w, x, y, z)
# different conventions
heading = -(rotation_y + np.pi / 2)
while heading < -np.pi:
heading += 2 * np.pi
while heading > np.pi:
heading -= 2 * np.pi
box = label_pb2.Label.Box()
box.center_x = x
box.center_y = y
box.center_z = z
box.length = length
box.width = width
box.height = height
box.heading = heading
o = metrics_pb2.Object()
o.object.box.CopyFrom(box)
o.object.type = self.k2w_cls_map[cls]
o.score = score
o.context_name = context_name
o.frame_timestamp_micros = frame_timestamp_micros
return o
objects = metrics_pb2.Objects()
for instance_idx in range(len(kitti_result['name'])):
o = parse_one_object(instance_idx)
objects.objects.append(o)
return objects
def convert_one(self, file_idx):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
file_pathname = self.waymo_tfrecord_pathnames[file_idx]
file_data = tf.data.TFRecordDataset(file_pathname, compression_type='')
for frame_num, frame_data in enumerate(file_data):
frame = open_dataset.Frame()
frame.ParseFromString(bytearray(frame_data.numpy()))
filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}'
for camera in frame.context.camera_calibrations:
# FRONT = 1, see dataset.proto for details
if camera.name == 1:
T_front_cam_to_vehicle = np.array(
camera.extrinsic.transform).reshape(4, 4)
T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam
context_name = frame.context.name
frame_timestamp_micros = frame.timestamp_micros
if filename in self.name2idx:
kitti_result = \
self.kitti_result_files[self.name2idx[filename]]
objects = self.parse_objects(kitti_result, T_k2w, context_name,
frame_timestamp_micros)
else:
print(filename, 'not found.(bevformer)')
objects = metrics_pb2.Objects()
with open(
join(self.waymo_results_save_dir, f'{filename}.bin'),
'wb') as f:
f.write(objects.SerializeToString())
def convert(self):
"""Convert action."""
print('Start converting ...')
mmcv.track_parallel_progress(self.convert_one, range(len(self)),
self.workers)
print('\nFinished ...')
# combine all files into one .bin
pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin')))
combined = self.combine(pathnames)
with open(self.waymo_results_final_path, 'wb') as f:
f.write(combined.SerializeToString())
def __len__(self):
"""Length of the filename list."""
return len(self.waymo_tfrecord_pathnames)
def transform(self, T, x, y, z):
"""Transform the coordinates with matrix T.
Args:
T (np.ndarray): Transformation matrix.
x(float): Coordinate in x axis.
y(float): Coordinate in y axis.
z(float): Coordinate in z axis.
Returns:
list: Coordinates after transformation.
"""
pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1)
pt_aft = np.matmul(T, pt_bef)
return pt_aft[:3].flatten().tolist()
def combine(self, pathnames):
"""Combine predictions in waymo format for each sample together.
Args:
pathnames (str): Paths to save predictions.
Returns:
:obj:`Objects`: Combined predictions in Objects proto.
"""
combined = metrics_pb2.Objects()
for pathname in pathnames:
objects = metrics_pb2.Objects()
with open(pathname, 'rb') as f:
objects.ParseFromString(f.read())
for o in objects.objects:
combined.objects.append(o)
return combined
\ No newline at end of file
from .nuscenes_dataset import CustomNuScenesDataset
from .builder import custom_build_dataset
from .nuscenes_map_dataset import CustomNuScenesLocalMapDataset
from .av2_map_dataset import CustomAV2LocalMapDataset
from .nuscenes_offlinemap_dataset import CustomNuScenesOfflineLocalMapDataset
from .av2_offlinemap_dataset import CustomAV2OfflineLocalMapDataset
__all__ = [
'CustomNuScenesDataset','CustomNuScenesLocalMapDataset'
]
import copy
import numpy as np
from mmdet.datasets import DATASETS
from mmdet3d.datasets import NuScenesDataset
import mmcv
import os
from os import path as osp
from mmdet.datasets import DATASETS
import torch
import numpy as np
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from .nuscnes_eval import NuScenesEval_custom
from projects.mmdet3d_plugin.models.utils.visual import save_tensor
from mmcv.parallel import DataContainer as DC
import random
from .nuscenes_dataset import CustomNuScenesDataset
from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from shapely import affinity, ops
from shapely.geometry import Polygon, LineString, box, MultiPolygon, MultiLineString
from mmdet.datasets.pipelines import to_tensor
import json
from pathlib import Path
from av2.datasets.sensor.av2_sensor_dataloader import AV2SensorDataLoader
from av2.map.lane_segment import LaneMarkType, LaneSegment
from av2.map.map_api import ArgoverseStaticMap
from av2.geometry.se3 import SE3
import av2.geometry.interpolate as interp_utils
class LiDARInstanceLines(object):
"""Line instance in LIDAR coordinates
"""
def __init__(self,
instance_line_list,
sample_dist=1,
num_samples=250,
padding=False,
fixed_num=-1,
padding_value=-10000,
patch_size=None):
assert isinstance(instance_line_list, list)
assert patch_size is not None
if len(instance_line_list) != 0:
assert isinstance(instance_line_list[0], LineString)
self.patch_size = patch_size
self.max_x = self.patch_size[1] / 2
self.max_y = self.patch_size[0] / 2
self.sample_dist = sample_dist
self.num_samples = num_samples
self.padding = padding
self.fixed_num = fixed_num
self.padding_value = padding_value
self.instance_list = instance_line_list
@property
def start_end_points(self):
"""
return torch.Tensor([N,4]), in xstart, ystart, xend, yend form
"""
assert len(self.instance_list) != 0
instance_se_points_list = []
for instance in self.instance_list:
se_points = []
se_points.extend(instance.coords[0])
se_points.extend(instance.coords[-1])
instance_se_points_list.append(se_points)
instance_se_points_array = np.array(instance_se_points_list)
instance_se_points_tensor = to_tensor(instance_se_points_array)
instance_se_points_tensor = instance_se_points_tensor.to(
dtype=torch.float32)
instance_se_points_tensor[:,0] = torch.clamp(instance_se_points_tensor[:,0], min=-self.max_x,max=self.max_x)
instance_se_points_tensor[:,1] = torch.clamp(instance_se_points_tensor[:,1], min=-self.max_y,max=self.max_y)
instance_se_points_tensor[:,2] = torch.clamp(instance_se_points_tensor[:,2], min=-self.max_x,max=self.max_x)
instance_se_points_tensor[:,3] = torch.clamp(instance_se_points_tensor[:,3], min=-self.max_y,max=self.max_y)
return instance_se_points_tensor
@property
def bbox(self):
"""
return torch.Tensor([N,4]), in xmin, ymin, xmax, ymax form
"""
assert len(self.instance_list) != 0
instance_bbox_list = []
for instance in self.instance_list:
# bounds is bbox: [xmin, ymin, xmax, ymax]
instance_bbox_list.append(instance.bounds)
instance_bbox_array = np.array(instance_bbox_list)
instance_bbox_tensor = to_tensor(instance_bbox_array)
instance_bbox_tensor = instance_bbox_tensor.to(
dtype=torch.float32)
instance_bbox_tensor[:,0] = torch.clamp(instance_bbox_tensor[:,0], min=-self.max_x,max=self.max_x)
instance_bbox_tensor[:,1] = torch.clamp(instance_bbox_tensor[:,1], min=-self.max_y,max=self.max_y)
instance_bbox_tensor[:,2] = torch.clamp(instance_bbox_tensor[:,2], min=-self.max_x,max=self.max_x)
instance_bbox_tensor[:,3] = torch.clamp(instance_bbox_tensor[:,3], min=-self.max_y,max=self.max_y)
return instance_bbox_tensor
@property
def fixed_num_sampled_points(self):
"""
return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
N means the num of instances
"""
assert len(self.instance_list) != 0
instance_points_list = []
for instance in self.instance_list:
# instance_array = np.array(list(instance.coords))
# interpolated_instance = interp_utils.interp_arc(t=self.fixed_num, points=instance_array)
distances = np.linspace(0, instance.length, self.fixed_num)
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances])
if instance.has_z:
sampled_points = sampled_points.reshape(-1,3)
else:
sampled_points = sampled_points.reshape(-1,2)
# import pdb;pdb.set_trace()
instance_points_list.append(sampled_points)
instance_points_array = np.array(instance_points_list)
instance_points_tensor = to_tensor(instance_points_array)
instance_points_tensor = instance_points_tensor.to(
dtype=torch.float32)
instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
return instance_points_tensor
@property
def fixed_num_sampled_points_ambiguity(self):
"""
return torch.Tensor([N,fixed_num,3]), in xmin, ymin, xmax, ymax form
N means the num of instances
"""
assert len(self.instance_list) != 0
instance_points_list = []
for instance in self.instance_list:
distances = np.linspace(0, instance.length, self.fixed_num)
if instance.has_z:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
else:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
instance_points_list.append(sampled_points)
instance_points_array = np.array(instance_points_list)
instance_points_tensor = to_tensor(instance_points_array)
instance_points_tensor = instance_points_tensor.to(
dtype=torch.float32)
instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
instance_points_tensor = instance_points_tensor if is_3d else instance_points_tensor[:,:,:2]
instance_points_tensor = instance_points_tensor.unsqueeze(1)
return instance_points_tensor
@property
def fixed_num_sampled_points_torch(self):
"""
return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
N means the num of instances
"""
assert len(self.instance_list) != 0
instance_points_list = []
for instance in self.instance_list:
# distances = np.linspace(0, instance.length, self.fixed_num)
# sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
poly_pts = to_tensor(np.array(list(instance.coords)))
poly_pts = poly_pts.unsqueeze(0).permute(0,2,1)
sampled_pts = torch.nn.functional.interpolate(poly_pts,size=(self.fixed_num),mode='linear',align_corners=True)
sampled_pts = sampled_pts.permute(0,2,1).squeeze(0)
instance_points_list.append(sampled_pts)
# instance_points_array = np.array(instance_points_list)
# instance_points_tensor = to_tensor(instance_points_array)
instance_points_tensor = torch.stack(instance_points_list,dim=0)
instance_points_tensor = instance_points_tensor.to(
dtype=torch.float32)
instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
return instance_points_tensor
@property
def shift_fixed_num_sampled_points(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
fixed_num = fixed_num_pts.shape[0]
shift_pts_list = []
if is_poly:
# import pdb;pdb.set_trace()
for shift_right_i in range(fixed_num):
shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
if not is_poly:
padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v1(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
pts_num = fixed_num_pts.shape[0]
shift_num = pts_num - 1
if is_poly:
pts_to_shift = fixed_num_pts[:-1,:]
shift_pts_list = []
if is_poly:
for shift_right_i in range(shift_num):
shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
if is_poly:
_, _, num_coords = shift_pts.shape
tmp_shift_pts = shift_pts.new_zeros((shift_num, pts_num, num_coords))
tmp_shift_pts[:,:-1,:] = shift_pts
tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
shift_pts = tmp_shift_pts
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
if not is_poly:
padding = torch.full([shift_num-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v2(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
assert len(self.instance_list) != 0
instances_list = []
for instance in self.instance_list:
distances = np.linspace(0, instance.length, self.fixed_num)
poly_pts = np.array(list(instance.coords))
start_pts = poly_pts[0]
end_pts = poly_pts[-1]
is_poly = np.equal(start_pts, end_pts)
is_poly = is_poly.all()
shift_pts_list = []
pts_num, coords_num = poly_pts.shape
shift_num = pts_num - 1
final_shift_num = self.fixed_num - 1
if is_poly:
pts_to_shift = poly_pts[:-1,:]
for shift_right_i in range(shift_num):
shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
pts_to_concat = shift_pts[0]
pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
shift_instance = LineString(shift_pts)
shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(shift_sampled_points)
# import pdb;pdb.set_trace()
else:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
flip_sampled_points = np.flip(sampled_points, axis=0)
shift_pts_list.append(sampled_points)
shift_pts_list.append(flip_sampled_points)
multi_shifts_pts = np.stack(shift_pts_list,axis=0)
shifts_num,_,_ = multi_shifts_pts.shape
if shifts_num > final_shift_num:
index = np.random.choice(multi_shifts_pts.shape[0], final_shift_num, replace=False)
multi_shifts_pts = multi_shifts_pts[index]
multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
dtype=torch.float32)
multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
# if not is_poly:
if multi_shifts_pts_tensor.shape[0] < final_shift_num:
padding = torch.full([final_shift_num-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
instances_list.append(multi_shifts_pts_tensor)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v3(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
assert len(self.instance_list) != 0
instances_list = []
for instance in self.instance_list:
distances = np.linspace(0, instance.length, self.fixed_num)
poly_pts = np.array(list(instance.coords))
start_pts = poly_pts[0]
end_pts = poly_pts[-1]
is_poly = np.equal(start_pts, end_pts)
is_poly = is_poly.all()
shift_pts_list = []
pts_num, coords_num = poly_pts.shape
shift_num = pts_num - 1
final_shift_num = self.fixed_num - 1
if is_poly:
pts_to_shift = poly_pts[:-1,:]
for shift_right_i in range(shift_num):
shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
pts_to_concat = shift_pts[0]
pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
shift_instance = LineString(shift_pts)
shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(shift_sampled_points)
flip_pts_to_shift = np.flip(pts_to_shift, axis=0)
for shift_right_i in range(shift_num):
shift_pts = np.roll(flip_pts_to_shift,shift_right_i,axis=0)
pts_to_concat = shift_pts[0]
pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
shift_instance = LineString(shift_pts)
shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(shift_sampled_points)
# import pdb;pdb.set_trace()
else:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
flip_sampled_points = np.flip(sampled_points, axis=0)
shift_pts_list.append(sampled_points)
shift_pts_list.append(flip_sampled_points)
multi_shifts_pts = np.stack(shift_pts_list,axis=0)
shifts_num,_,_ = multi_shifts_pts.shape
# import pdb;pdb.set_trace()
if shifts_num > 2*final_shift_num:
index = np.random.choice(shift_num, final_shift_num, replace=False)
flip0_shifts_pts = multi_shifts_pts[index]
flip1_shifts_pts = multi_shifts_pts[index+shift_num]
multi_shifts_pts = np.concatenate((flip0_shifts_pts,flip1_shifts_pts),axis=0)
multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
dtype=torch.float32)
multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
# if not is_poly:
if multi_shifts_pts_tensor.shape[0] < 2*final_shift_num:
padding = torch.full([final_shift_num*2-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
instances_list.append(multi_shifts_pts_tensor)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v4(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
pts_num = fixed_num_pts.shape[0]
shift_num = pts_num - 1
shift_pts_list = []
if is_poly:
pts_to_shift = fixed_num_pts[:-1,:]
for shift_right_i in range(shift_num):
shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
flip_pts_to_shift = pts_to_shift.flip(0)
for shift_right_i in range(shift_num):
shift_pts_list.append(flip_pts_to_shift.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
if is_poly:
_, _, num_coords = shift_pts.shape
tmp_shift_pts = shift_pts.new_zeros((shift_num*2, pts_num, num_coords))
tmp_shift_pts[:,:-1,:] = shift_pts
tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
shift_pts = tmp_shift_pts
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
if not is_poly:
padding = torch.full([shift_num*2-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_torch(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points_torch
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
fixed_num = fixed_num_pts.shape[0]
shift_pts_list = []
if is_poly:
# import pdb;pdb.set_trace()
for shift_right_i in range(fixed_num):
shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
if not is_poly:
padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
# @property
# def polyline_points(self):
# """
# return [[x0,y0],[x1,y1],...]
# """
# assert len(self.instance_list) != 0
# for instance in self.instance_list:
class VectorizedAV2LocalMap(object):
CLASS2LABEL = {
'road_divider': 0,
'lane_divider': 0,
'divider': 0,
'ped_crossing': 1,
'boundary': 2,
'others': -1
}
def __init__(self,
dataroot,
patch_size,
test_mode=False,
map_classes=['divider','ped_crossing','boundary'],
line_classes=['road_divider', 'lane_divider'],
ped_crossing_classes=['ped_crossing'],
contour_classes=['road_segment', 'lane'],
sample_dist=1,
num_samples=250,
padding=False,
fixed_ptsnum_per_line=-1,
padding_value=-10000,):
'''
Args:
fixed_ptsnum_per_line = -1 : no fixed num
'''
super().__init__()
# self.data_root = dataroot
self.test_mode = test_mode
if self.test_mode:
self.data_root = osp.join(dataroot, "val")
else:
self.data_root = osp.join(dataroot, "train")
self.loader = AV2SensorDataLoader(data_dir=Path(dataroot), labels_dir=Path(dataroot))
self.vec_classes = map_classes
self.line_classes = line_classes
self.ped_crossing_classes = ped_crossing_classes
self.polygon_classes = contour_classes
self.patch_size = patch_size
self.sample_dist = sample_dist
self.num_samples = num_samples
self.padding = padding
self.fixed_num = fixed_ptsnum_per_line
self.padding_value = padding_value
def gen_vectorized_samples(self, location, map_elements, lidar2global_translation, lidar2global_rotation):
'''
use lidar2global to get gt map layers
av2 lidar2global the same as ego2global
location the same as log_id
'''
# avm = ArgoverseStaticMap.from_map_dir(log_map_dirpath, build_raster=False)
map_pose = lidar2global_translation[:2]
rotation = Quaternion._from_matrix(lidar2global_rotation)
patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])
patch_angle = quaternion_yaw(rotation) / np.pi * 180
# import pdb;pdb.set_trace()
vectors = []
city_SE2_ego = SE3(lidar2global_rotation, lidar2global_translation)
ego_SE3_city = city_SE2_ego.inverse()
for vec_class in self.vec_classes:
if vec_class == 'divider':
line_geom = self.get_map_divider_geom(patch_box, patch_angle, map_elements[vec_class], ego_SE3_city)
line_instances_list = self.line_geoms_to_instances(line_geom)
for divider in line_instances_list:
vectors.append((divider, self.CLASS2LABEL.get('divider', -1)))
elif vec_class == 'ped_crossing':
ped_geom = self.get_map_ped_geom(patch_box, patch_angle, map_elements[vec_class], ego_SE3_city)
ped_instance_list = self.ped_poly_geoms_to_instances(ped_geom)
for instance in ped_instance_list:
vectors.append((instance, self.CLASS2LABEL.get('ped_crossing', -1)))
elif vec_class == 'boundary':
polygon_geom = self.get_map_boundary_geom(patch_box, patch_angle, map_elements[vec_class], ego_SE3_city)
poly_bound_list = self.bound_poly_geoms_to_instances(polygon_geom)
for bound in poly_bound_list:
vectors.append((bound, self.CLASS2LABEL.get('boundary', -1)))
else:
raise ValueError(f'WRONG vec_class: {vec_class}')
# filter out -1
filtered_vectors = []
gt_pts_loc_3d = []
gt_pts_num_3d = []
gt_labels = []
gt_instance = []
for instance, type in vectors:
if type != -1:
gt_instance.append(instance)
gt_labels.append(type)
# import pdb;pdb.set_trace()
gt_instance = LiDARInstanceLines(gt_instance,self.sample_dist,
self.num_samples, self.padding, self.fixed_num,self.padding_value, patch_size=self.patch_size)
anns_results = dict(
gt_vecs_pts_loc=gt_instance,
gt_vecs_label=gt_labels,
)
# import pdb;pdb.set_trace()
return anns_results
def proc_polygon(self, polygon, ego_SE3_city):
# import pdb;pdb.set_trace()
interiors = []
exterior_cityframe = np.array(list(polygon.exterior.coords))
exterior_egoframe = ego_SE3_city.transform_point_cloud(exterior_cityframe)
for inter in polygon.interiors:
inter_cityframe = np.array(list(inter.coords))
inter_egoframe = ego_SE3_city.transform_point_cloud(inter_cityframe)
interiors.append(inter_egoframe[:,:2])
new_polygon = Polygon(exterior_egoframe[:,:2], interiors)
return new_polygon
def get_map_boundary_geom(self, patch_box, patch_angle, avm, ego_SE3_city):
map_boundary_geom = []
patch_x = patch_box[0]
patch_y = patch_box[1]
patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
# import pdb;pdb.set_trace()
polygon_list = []
for da in avm:
exterior_coords = da
# import pdb;pdb.set_trace()
interiors = []
# import pdb;pdb.set_trace()
is_polygon = np.array_equal(exterior_coords[0],exterior_coords[-1])
if is_polygon:
polygon = Polygon(exterior_coords, interiors)
else:
import pdb;pdb.set_trace()
polygon = LineString(exterior_coords)
raise ValueError(f'WRONG type: line in boundary')
if is_polygon:
if polygon.is_valid:
new_polygon = polygon.intersection(patch)
if not new_polygon.is_empty:
# import pdb;pdb.set_trace()
if new_polygon.geom_type is 'Polygon':
if not new_polygon.is_valid:
continue
new_polygon = self.proc_polygon(new_polygon,ego_SE3_city)
if not new_polygon.is_valid:
continue
elif new_polygon.geom_type is 'MultiPolygon':
polygons = []
for single_polygon in new_polygon.geoms:
if not single_polygon.is_valid or single_polygon.is_empty:
continue
new_single_polygon = self.proc_polygon(single_polygon,ego_SE3_city)
if not new_single_polygon.is_valid:
continue
polygons.append(new_single_polygon)
if len(polygons) == 0:
continue
new_polygon = MultiPolygon(polygons)
if not new_polygon.is_valid:
continue
else:
raise ValueError('{} is not valid'.format(new_polygon.geom_type))
if new_polygon.geom_type is 'Polygon':
new_polygon = MultiPolygon([new_polygon])
polygon_list.append(new_polygon)
else:
raise ValueError(f'WRONG type: line in boundary')
map_boundary_geom.append(('boundary',polygon_list))
return map_boundary_geom
def get_map_ped_geom(self, patch_box, patch_angle, avm, ego_SE3_city):
map_ped_geom = []
patch_x = patch_box[0]
patch_y = patch_box[1]
patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
# import pdb;pdb.set_trace()
polygon_list = []
for pc in avm:
exterior_coords = pc
interiors = []
polygon = Polygon(exterior_coords, interiors)
if polygon.is_valid:
new_polygon = polygon.intersection(patch)
if not new_polygon.is_empty:
if new_polygon.geom_type is 'Polygon':
if not new_polygon.is_valid:
continue
new_polygon = self.proc_polygon(new_polygon,ego_SE3_city)
if not new_polygon.is_valid:
continue
elif new_polygon.geom_type is 'MultiPolygon':
polygons = []
for single_polygon in new_polygon.geoms:
if not single_polygon.is_valid or single_polygon.is_empty:
continue
new_single_polygon = self.proc_polygon(single_polygon,ego_SE3_city)
if not new_single_polygon.is_valid:
continue
polygons.append(new_single_polygon)
if len(polygons) == 0:
continue
new_polygon = MultiPolygon(polygons)
if not new_polygon.is_valid:
continue
else:
raise ValueError('{} is not valid'.format(new_polygon.geom_type))
if new_polygon.geom_type is 'Polygon':
new_polygon = MultiPolygon([new_polygon])
polygon_list.append(new_polygon)
map_ped_geom.append(('ped_crossing',polygon_list))
return map_ped_geom
def proc_line(self, line,ego_SE3_city):
# import pdb;pdb.set_trace()
new_line_pts_cityframe = np.array(list(line.coords))
new_line_pts_egoframe = ego_SE3_city.transform_point_cloud(new_line_pts_cityframe)
line = LineString(new_line_pts_egoframe[:,:2]) #TODO
return line
def get_map_divider_geom(self, patch_box, patch_angle, avm, ego_SE3_city):
map_divider_geom = []
patch_x = patch_box[0]
patch_y = patch_box[1]
patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
line_list = []
for ls in avm:
line = LineString(ls)
if line.is_empty: # Skip lines without nodes.
continue
new_line = line.intersection(patch)
if not new_line.is_empty:
# import pdb;pdb.set_trace()
if new_line.geom_type == 'MultiLineString':
for single_line in new_line.geoms:
if single_line.is_empty:
continue
single_line = self.proc_line(single_line,ego_SE3_city)
line_list.append(single_line)
else:
new_line = self.proc_line(new_line, ego_SE3_city)
line_list.append(new_line)
map_divider_geom.append(('divider',line_list))
return map_divider_geom
def _one_type_line_geom_to_instances(self, line_geom):
line_instances = []
for line in line_geom:
if not line.is_empty:
if line.geom_type == 'MultiLineString':
for single_line in line.geoms:
line_instances.append(single_line)
elif line.geom_type == 'LineString':
line_instances.append(line)
else:
raise NotImplementedError
return line_instances
def ped_poly_geoms_to_instances(self, ped_geom):
ped = ped_geom[0][1]
# union_segments = ops.unary_union(ped)
# union_segments = MultiPolygon(ped)
max_x = self.patch_size[1] / 2
max_y = self.patch_size[0] / 2
# local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
local_patch = box(-max_x - 0.2, -max_y - 0.2, max_x + 0.2, max_y + 0.2)
exteriors = []
interiors = []
# if union_segments.geom_type != 'MultiPolygon':
# union_segments = MultiPolygon([union_segments])
for segments in ped:
if segments.geom_type != 'MultiPolygon':
segments = MultiPolygon([segments])
for poly in segments.geoms:
exteriors.append(poly.exterior)
for inter in poly.interiors:
interiors.append(inter)
results = []
for ext in exteriors:
if ext.is_ccw:
ext.coords = list(ext.coords)[::-1]
lines = ext.intersection(local_patch)
if isinstance(lines, MultiLineString):
lines = ops.linemerge(lines)
results.append(lines)
for inter in interiors:
if not inter.is_ccw:
inter.coords = list(inter.coords)[::-1]
lines = inter.intersection(local_patch)
if isinstance(lines, MultiLineString):
lines = ops.linemerge(lines)
results.append(lines)
return self._one_type_line_geom_to_instances(results)
def bound_poly_geoms_to_instances(self, polygon_geom):
# roads = polygon_geom[0][1]
# lanes = polygon_geom[1][1]
# union_roads = ops.unary_union(roads)
# union_lanes = ops.unary_union(lanes)
# union_segments = ops.unary_union([union_roads, union_lanes])
# import pdb;pdb.set_trace()
bounds = polygon_geom[0][1]
union_segments = ops.unary_union(bounds)
max_x = self.patch_size[1] / 2
max_y = self.patch_size[0] / 2
local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
exteriors = []
interiors = []
if union_segments.geom_type != 'MultiPolygon':
union_segments = MultiPolygon([union_segments])
for poly in union_segments.geoms:
exteriors.append(poly.exterior)
for inter in poly.interiors:
interiors.append(inter)
results = []
for ext in exteriors:
if ext.is_ccw:
ext.coords = list(ext.coords)[::-1]
lines = ext.intersection(local_patch)
if isinstance(lines, MultiLineString):
lines = ops.linemerge(lines)
results.append(lines)
for inter in interiors:
if not inter.is_ccw:
inter.coords = list(inter.coords)[::-1]
lines = inter.intersection(local_patch)
if isinstance(lines, MultiLineString):
lines = ops.linemerge(lines)
results.append(lines)
return self._one_type_line_geom_to_instances(results)
def line_geoms_to_instances(self, line_geom):
lines = line_geom[0][1]
multiline = MultiLineString(lines)
union_lines = ops.unary_union(multiline)
if union_lines.geom_type == 'LineString':
return self._one_type_line_geom_to_instances([union_lines])
before_num = len(union_lines.geoms)
# import pdb;pdb.set_trace()
merged_lines = ops.linemerge(union_lines)
if merged_lines.geom_type == 'LineString':
return self._one_type_line_geom_to_instances([merged_lines])
after_num = len(merged_lines.geoms)
# import pdb;pdb.set_trace()
while after_num != before_num:
before_num = len(merged_lines.geoms)
merged_lines = ops.unary_union(merged_lines)
if merged_lines.geom_type == 'LineString':
break
merged_lines = ops.linemerge(merged_lines)
if merged_lines.geom_type == 'LineString':
break
after_num = len(merged_lines.geoms)
return self._one_type_line_geom_to_instances([merged_lines])
def sample_pts_from_line(self, line):
if self.fixed_num < 0:
distances = np.arange(0, line.length, self.sample_dist)
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
else:
# fixed number of points, so distance is line.length / self.fixed_num
distances = np.linspace(0, line.length, self.fixed_num)
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
num_valid = len(sampled_points)
if not self.padding or self.fixed_num > 0:
# fixed num sample can return now!
return sampled_points, num_valid
# fixed distance sampling need padding!
num_valid = len(sampled_points)
if self.fixed_num < 0:
if num_valid < self.num_samples:
padding = np.zeros((self.num_samples - len(sampled_points), 2))
sampled_points = np.concatenate([sampled_points, padding], axis=0)
else:
sampled_points = sampled_points[:self.num_samples, :]
num_valid = self.num_samples
return sampled_points, num_valid
@DATASETS.register_module()
class CustomAV2LocalMapDataset(CustomNuScenesDataset):
r"""NuScenes Dataset.
This datset add static map elements
"""
MAPCLASSES = ('divider',)
def __init__(self,
map_ann_file=None,
queue_length=4,
code_size=2,
bev_size=(200, 200),
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
overlap_test=False,
fixed_ptsnum_per_line=-1,
eval_use_same_gt_sample_num_flag=False,
padding_value=-10000,
map_classes=None,
*args,
**kwargs):
super().__init__(*args, **kwargs)
self.map_ann_file = map_ann_file
self.code_size = code_size
self.queue_length = queue_length
self.overlap_test = overlap_test
self.bev_size = bev_size
self.MAPCLASSES = self.get_map_classes(map_classes)
self.NUM_MAPCLASSES = len(self.MAPCLASSES)
self.pc_range = pc_range
patch_h = pc_range[4]-pc_range[1]
patch_w = pc_range[3]-pc_range[0]
self.patch_size = (patch_h, patch_w)
self.padding_value = padding_value
self.fixed_num = fixed_ptsnum_per_line
self.eval_use_same_gt_sample_num_flag = eval_use_same_gt_sample_num_flag
self.vector_map = VectorizedAV2LocalMap(kwargs['data_root'],
patch_size=self.patch_size, test_mode=self.test_mode,
map_classes=self.MAPCLASSES,
fixed_ptsnum_per_line=fixed_ptsnum_per_line,
padding_value=self.padding_value)
self.is_vis_on_test = False
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
# import pdb;pdb.set_trace()
data = mmcv.load(ann_file)
# import pdb;pdb.set_trace()
data_infos = list(sorted(data['samples'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
# data_infos = [ data_info.update(dict(token= str(data_info['timestamp']+data_info['log_id']))) for data_info in data_infos]
self.id2map = data['id2map']
self.metadata = None
self.version = None
return data_infos
@classmethod
def get_map_classes(cls, map_classes=None):
"""Get class names of current dataset.
Args:
classes (Sequence[str] | str | None): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Return:
list[str]: A list of class names.
"""
if map_classes is None:
return cls.MAPCLASSES
if isinstance(map_classes, str):
# take it as a file path
class_names = mmcv.list_from_file(map_classes)
elif isinstance(map_classes, (tuple, list)):
class_names = map_classes
else:
raise ValueError(f'Unsupported type {type(map_classes)} of map classes.')
return class_names
def vectormap_pipeline(self, example, input_dict):
'''
`example` type: <class 'dict'>
keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';
all keys type is 'DataContainer';
'img_metas' cpu_only=True, type is dict, others are false;
'gt_labels_3d' shape torch.size([num_samples]), stack=False,
padding_value=0, cpu_only=False
'gt_bboxes_3d': stack=False, cpu_only=True
'''
# import pdb;pdb.set_trace()
location = input_dict['log_id']
e2g_translation = input_dict['e2g_translation']
e2g_rotation = input_dict['e2g_rotation']
map_elements = self.id2map[location]
anns_results = self.vector_map.gen_vectorized_samples(location, map_elements, e2g_translation, e2g_rotation)
'''
anns_results, type: dict
'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates
'gt_vecs_pts_num': list[num_vecs], vec with num_points
'gt_vecs_label': list[num_vecs], vec with cls index
'''
gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])
if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):
gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']
else:
gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])
try:
gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)
except:
# empty tensor, will be passed in train,
# but we preserve it for test
# import pdb;pdb.set_trace()
gt_vecs_pts_loc = gt_vecs_pts_loc
# import ipdb;ipdb.set_trace()
example['gt_labels_3d'] = DC(gt_vecs_label, cpu_only=False)
example['gt_bboxes_3d'] = DC(gt_vecs_pts_loc, cpu_only=True)
# import pdb;pdb.set_trace()
# if self.is_vis_on_test:
# lidar2global_translation = to_tensor(lidar2global_translation)
# example['lidar2global_translation'] = DC(lidar2global_translation, cpu_only=True)
# else:
# example['img_metas'].data['lidar2global_translation'] = lidar2global_translation
return example
def prepare_train_data(self, index):
"""
Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
data_queue = []
# temporal aug
prev_indexs_list = list(range(index-self.queue_length, index))
random.shuffle(prev_indexs_list)
prev_indexs_list = sorted(prev_indexs_list[1:], reverse=True)
##
input_dict = self.get_data_info(index)
if input_dict is None:
return None
frame_idx = input_dict['timestamp']
scene_token = input_dict['log_id']
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
# import pdb;pdb.set_trace()
example = self.vectormap_pipeline(example,input_dict)
if self.filter_empty_gt and \
(example is None or ~(example['gt_labels_3d']._data != -1).any()):
return None
data_queue.insert(0, example)
return self.union2one(data_queue)
def union2one(self, queue):
"""
convert sample queue into one single sample.
"""
imgs_list = [each['img'].data for each in queue]
metas_map = {}
prev_pos = None
prev_angle = None
for i, each in enumerate(queue):
metas_map[i] = each['img_metas'].data
if i == 0:
metas_map[i]['prev_bev'] = False
prev_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
prev_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
metas_map[i]['can_bus'][:3] = 0
metas_map[i]['can_bus'][-1] = 0
else:
metas_map[i]['prev_bev'] = True
tmp_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
tmp_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
metas_map[i]['can_bus'][:3] -= prev_pos
metas_map[i]['can_bus'][-1] -= prev_angle
prev_pos = copy.deepcopy(tmp_pos)
prev_angle = copy.deepcopy(tmp_angle)
queue[-1]['img'] = DC(torch.stack(imgs_list),
cpu_only=False, stack=True)
queue[-1]['img_metas'] = DC(metas_map, cpu_only=True)
queue = queue[-1]
return queue
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data \
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations \
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
# standard protocal modified from SECOND.Pytorch
input_dict = dict(
timestamp=info['timestamp'],
e2g_translation=info['e2g_translation'],
e2g_rotation=info['e2g_rotation'],
log_id=info['log_id'],
scene_token=info['log_id'],
)
if self.modality['use_camera']:
image_paths = []
cam_intrinsics = []
lidar2img_rts = []
lidar2cam_rts = []
cam_types = []
for cam_type, cam_info in info['cams'].items():
image_paths.append(cam_info['img_fpath'])
# camera intrinsics
camera_intrinsics = np.eye(4).astype(np.float32)
camera_intrinsics[:3, :3] = cam_info["intrinsics"]
# input_dict["camera_intrinsics"].append(camera_intrinsics)
# ego2img, ego = lidar
lidar2cam_rt = cam_info['extrinsics']
intrinsic = cam_info['intrinsics']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
lidar2img_rt = (viewpad @ lidar2cam_rt)
lidar2img_rts.append(lidar2img_rt)
lidar2cam_rts.append(lidar2cam_rt)
cam_intrinsics.append(viewpad)
cam_types.append(cam_type)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=lidar2img_rts,
cam_intrinsic=cam_intrinsics,
lidar2cam=lidar2cam_rts,
cam_type=cam_types,
))
if not self.test_mode:
# annos = self.get_ann_info(index)
input_dict['ann_info'] = dict()
translation = input_dict['e2g_translation']
can_bus = np.ones(18)
# can_bus.extend(translation.tolist())
can_bus[:3] = translation
rotation = Quaternion._from_matrix(input_dict['e2g_rotation'])
can_bus[3:7] = rotation
patch_angle = quaternion_yaw(rotation) / np.pi * 180
if patch_angle < 0:
patch_angle += 360
can_bus[-2] = patch_angle / 180 * np.pi
can_bus[-1] = patch_angle
input_dict['can_bus'] = can_bus
# import pdb;pdb.set_trace()
return input_dict
def prepare_test_data(self, index):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if self.is_vis_on_test:
example = self.vectormap_pipeline(example, input_dict)
return example
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _format_gt(self):
gt_annos = []
# import pdb;pdb.set_trace()
print('Start to convert gt map format...')
assert self.map_ann_file is not None
if (not os.path.exists(self.map_ann_file)) :
dataset_length = len(self)
prog_bar = mmcv.ProgressBar(dataset_length)
mapped_class_names = self.MAPCLASSES
for sample_id in range(dataset_length):
sample_token = self.data_infos[sample_id]['token']
gt_anno = {}
gt_anno['sample_token'] = sample_token
# gt_sample_annos = []
gt_sample_dict = {}
gt_sample_dict = self.vectormap_pipeline(gt_sample_dict, self.data_infos[sample_id])
gt_labels = gt_sample_dict['gt_labels_3d'].data.numpy()
gt_vecs = gt_sample_dict['gt_bboxes_3d'].data.instance_list
# import pdb;pdb.set_trace()
gt_vec_list = []
for i, (gt_label, gt_vec) in enumerate(zip(gt_labels, gt_vecs)):
name = mapped_class_names[gt_label]
anno = dict(
pts=np.array(list(gt_vec.coords))[:,:self.code_size],
pts_num=len(list(gt_vec.coords)),
cls_name=name,
type=gt_label,
)
gt_vec_list.append(anno)
gt_anno['vectors']=gt_vec_list
gt_annos.append(gt_anno)
prog_bar.update()
nusc_submissions = {
'GTs': gt_annos
}
print('\n GT anns writes to', self.map_ann_file)
mmcv.dump(nusc_submissions, self.map_ann_file)
else:
print(f'{self.map_ann_file} exist, not update')
def _format_bbox(self, results, jsonfile_prefix=None):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
assert self.map_ann_file is not None
pred_annos = []
mapped_class_names = self.MAPCLASSES
# import pdb;pdb.set_trace()
print('Start to convert map detection format...')
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
pred_anno = {}
vecs = output_to_vecs(det)
sample_token = self.data_infos[sample_id]['token']
pred_anno['sample_token'] = sample_token
pred_vec_list=[]
for i, vec in enumerate(vecs):
name = mapped_class_names[vec['label']]
anno = dict(
# sample_token=sample_token,
pts=vec['pts'],
pts_num=len(vec['pts']),
cls_name=name,
type=vec['label'],
confidence_level=vec['score'])
pred_vec_list.append(anno)
# annos.append(nusc_anno)
# nusc_annos[sample_token] = annos
pred_anno['vectors'] = pred_vec_list
pred_annos.append(pred_anno)
if not os.path.exists(self.map_ann_file):
self._format_gt()
else:
print(f'{self.map_ann_file} exist, not update')
# with open(self.map_ann_file,'r') as f:
# GT_anns = json.load(f)
# gt_annos = GT_anns['GTs']
nusc_submissions = {
'meta': self.modality,
'results': pred_annos,
# 'GTs': gt_annos
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, 'nuscmap_results.json')
print('Results writes to', res_path)
mmcv.dump(nusc_submissions, res_path)
return res_path
def to_gt_vectors(self,
gt_dict):
# import pdb;pdb.set_trace()
gt_labels = gt_dict['gt_labels_3d'].data
gt_instances = gt_dict['gt_bboxes_3d'].data.instance_list
gt_vectors = []
for gt_instance, gt_label in zip(gt_instances, gt_labels):
pts, pts_num = sample_pts_from_line(gt_instance, patch_size=self.patch_size)
gt_vectors.append({
'pts': pts,
'pts_num': pts_num,
'type': int(gt_label)
})
vector_num_list = {}
for i in range(self.NUM_MAPCLASSES):
vector_num_list[i] = []
for vec in gt_vectors:
if vector['pts_num'] >= 2:
vector_num_list[vector['type']].append((LineString(vector['pts'][:vector['pts_num']]), vector.get('confidence_level', 1)))
return gt_vectors
def _evaluate_single(self,
result_path,
logger=None,
metric='chamfer',
result_name='pts_bbox'):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import eval_map
from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import format_res_gt_by_classes
result_path = osp.abspath(result_path)
# import pdb;pdb.set_trace()
detail = dict()
print('Formating results & gts by classes')
with open(result_path,'r') as f:
pred_results = json.load(f)
gen_results = pred_results['results']
with open(self.map_ann_file,'r') as ann_f:
gt_anns = json.load(ann_f)
annotations = gt_anns['GTs']
cls_gens, cls_gts = format_res_gt_by_classes(result_path,
gen_results,
annotations,
cls_names=self.MAPCLASSES,
num_pred_pts_per_instance=self.fixed_num,
eval_use_same_gt_sample_num_flag=self.eval_use_same_gt_sample_num_flag,
pc_range=self.pc_range)
metrics = metric if isinstance(metric, list) else [metric]
allowed_metrics = ['chamfer', 'iou']
for metric in metrics:
if metric not in allowed_metrics:
raise KeyError(f'metric {metric} is not supported')
for metric in metrics:
print('-*'*10+f'use metric:{metric}'+'-*'*10)
if metric == 'chamfer':
thresholds = [0.5,1.0,1.5]
elif metric == 'iou':
thresholds= np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
cls_aps = np.zeros((len(thresholds),self.NUM_MAPCLASSES))
for i, thr in enumerate(thresholds):
print('-*'*10+f'threshhold:{thr}'+'-*'*10)
mAP, cls_ap = eval_map(
gen_results,
annotations,
cls_gens,
cls_gts,
threshold=thr,
cls_names=self.MAPCLASSES,
logger=logger,
num_pred_pts_per_instance=self.fixed_num,
pc_range=self.pc_range,
metric=metric)
for j in range(self.NUM_MAPCLASSES):
cls_aps[i, j] = cls_ap[j]['ap']
for i, name in enumerate(self.MAPCLASSES):
print('{}: {}'.format(name, cls_aps.mean(0)[i]))
detail['AV2Map_{}/{}_AP'.format(metric,name)] = cls_aps.mean(0)[i]
print('map: {}'.format(cls_aps.mean(0).mean()))
detail['AV2Map_{}/mAP'.format(metric)] = cls_aps.mean(0).mean()
for i, name in enumerate(self.MAPCLASSES):
for j, thr in enumerate(thresholds):
if metric == 'chamfer':
detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
elif metric == 'iou':
if thr == 0.5 or thr == 0.75:
detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
return detail
def evaluate(self,
results,
metric='bbox',
logger=None,
jsonfile_prefix=None,
result_names=['pts_bbox'],
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in nuScenes protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
print('Evaluating bboxes of {}'.format(name))
ret_dict = self._evaluate_single(result_files[name], metric=metric)
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(result_files, metric=metric)
if tmp_dir is not None:
tmp_dir.cleanup()
if show:
self.show(results, out_dir, pipeline=pipeline)
return results_dict
def output_to_vecs(detection):
box3d = detection['boxes_3d'].numpy()
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
pts = detection['pts_3d'].numpy()
vec_list = []
# import pdb;pdb.set_trace()
for i in range(box3d.shape[0]):
vec = dict(
bbox = box3d[i], # xyxy
label=labels[i],
score=scores[i],
pts=pts[i],
)
vec_list.append(vec)
return vec_list
def sample_pts_from_line(line,
fixed_num=-1,
sample_dist=1,
normalize=False,
patch_size=None,
padding=False,
num_samples=250,):
if fixed_num < 0:
distances = np.arange(0, line.length, sample_dist)
if line.has_z:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
else:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
else:
# fixed number of points, so distance is line.length / fixed_num
distances = np.linspace(0, line.length, fixed_num)
if line.has_z:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
else:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
if normalize:
sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
num_valid = len(sampled_points)
if not padding or fixed_num > 0:
# fixed num sample can return now!
return sampled_points, num_valid
# fixed distance sampling need padding!
num_valid = len(sampled_points)
if fixed_num < 0:
if num_valid < num_samples:
padding = np.zeros((num_samples - len(sampled_points), sampled_points.shape[-1]))
sampled_points = np.concatenate([sampled_points, padding], axis=0)
else:
sampled_points = sampled_points[:num_samples, :]
num_valid = num_samples
if normalize:
sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
num_valid = len(sampled_points)
return sampled_points[:,:2], num_valid
\ No newline at end of file
import copy
import numpy as np
from mmdet.datasets import DATASETS
from mmdet3d.datasets import NuScenesDataset
import mmcv
import os
from os import path as osp
from mmdet.datasets import DATASETS
import torch
import numpy as np
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from projects.mmdet3d_plugin.models.utils.visual import save_tensor
from mmcv.parallel import DataContainer as DC
import random
from .nuscenes_dataset import CustomNuScenesDataset
from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
from shapely import affinity, ops
from shapely.geometry import Polygon, LineString, box, MultiPolygon, MultiLineString
from mmdet.datasets.pipelines import to_tensor
import json
from pathlib import Path
from av2.datasets.sensor.av2_sensor_dataloader import AV2SensorDataLoader
from av2.map.lane_segment import LaneMarkType, LaneSegment
from av2.map.map_api import ArgoverseStaticMap
from av2.geometry.se3 import SE3
import av2.geometry.interpolate as interp_utils
import cv2
def perspective(cam_coords, proj_mat):
pix_coords = proj_mat @ cam_coords
valid_idx = pix_coords[2, :] > 0
pix_coords = pix_coords[:, valid_idx]
pix_coords = pix_coords[:2, :] / (pix_coords[2, :] + 1e-7)
pix_coords = pix_coords.transpose(1, 0)
return pix_coords
class LiDARInstanceLines(object):
"""Line instance in LIDAR coordinates
"""
def __init__(self,
instance_line_list,
instance_labels,
sample_dist=1,
num_samples=250,
padding=False,
fixed_num=-1,
padding_value=-10000,
patch_size=None,
code_size=2,
min_z=-5,
max_z=3,):
assert isinstance(instance_line_list, list)
assert patch_size is not None
if len(instance_line_list) != 0:
assert isinstance(instance_line_list[0], LineString)
self.patch_size = patch_size
self.max_x = self.patch_size[1] / 2
self.max_y = self.patch_size[0] / 2
self.sample_dist = sample_dist
self.num_samples = num_samples
self.padding = padding
self.fixed_num = fixed_num
self.padding_value = padding_value
self.instance_list = instance_line_list
self.code_size = code_size
self.min_z = min_z
self.max_z = max_z
self.instance_labels = instance_labels
@property
def start_end_points(self):
"""
return torch.Tensor([N,4]), in xstart, ystart, xend, yend form
"""
assert len(self.instance_list) != 0
instance_se_points_list = []
for instance in self.instance_list:
se_points = []
se_points.extend(instance.coords[0])
se_points.extend(instance.coords[-1])
instance_se_points_list.append(se_points)
instance_se_points_array = np.array(instance_se_points_list)
instance_se_points_tensor = to_tensor(instance_se_points_array)
instance_se_points_tensor = instance_se_points_tensor.to(
dtype=torch.float32)
instance_se_points_tensor[:,0] = torch.clamp(instance_se_points_tensor[:,0], min=-self.max_x,max=self.max_x)
instance_se_points_tensor[:,1] = torch.clamp(instance_se_points_tensor[:,1], min=-self.max_y,max=self.max_y)
instance_se_points_tensor[:,2] = torch.clamp(instance_se_points_tensor[:,2], min=-self.max_x,max=self.max_x)
instance_se_points_tensor[:,3] = torch.clamp(instance_se_points_tensor[:,3], min=-self.max_y,max=self.max_y)
return instance_se_points_tensor
@property
def bbox(self):
"""
return torch.Tensor([N,4]), in xmin, ymin, xmax, ymax form
"""
assert len(self.instance_list) != 0
instance_bbox_list = []
for instance in self.instance_list:
# bounds is bbox: [xmin, ymin, xmax, ymax]
instance_bbox_list.append(instance.bounds)
instance_bbox_array = np.array(instance_bbox_list)
instance_bbox_tensor = to_tensor(instance_bbox_array)
instance_bbox_tensor = instance_bbox_tensor.to(
dtype=torch.float32)
instance_bbox_tensor[:,0] = torch.clamp(instance_bbox_tensor[:,0], min=-self.max_x,max=self.max_x)
instance_bbox_tensor[:,1] = torch.clamp(instance_bbox_tensor[:,1], min=-self.max_y,max=self.max_y)
instance_bbox_tensor[:,2] = torch.clamp(instance_bbox_tensor[:,2], min=-self.max_x,max=self.max_x)
instance_bbox_tensor[:,3] = torch.clamp(instance_bbox_tensor[:,3], min=-self.max_y,max=self.max_y)
return instance_bbox_tensor
@property
def fixed_num_sampled_points(self):
"""
return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
N means the num of instances
"""
assert len(self.instance_list) != 0
instance_points_list = []
for instance in self.instance_list:
# instance_array = np.array(list(instance.coords))
# interpolated_instance = interp_utils.interp_arc(t=self.fixed_num, points=instance_array)
distances = np.linspace(0, instance.length, self.fixed_num)
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances])
if instance.has_z:
sampled_points = sampled_points.reshape(-1,3)
else:
sampled_points = sampled_points.reshape(-1,2)
# import pdb;pdb.set_trace()
instance_points_list.append(sampled_points)
instance_points_array = np.array(instance_points_list)
instance_points_tensor = to_tensor(instance_points_array)
instance_points_tensor = instance_points_tensor.to(
dtype=torch.float32)
instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
instance_points_tensor[:,:,2] = torch.clamp(instance_points_tensor[:,:,2], min=self.min_z,max=self.max_z)
return instance_points_tensor
@property
def fixed_num_sampled_points_ambiguity(self):
"""
return torch.Tensor([N,fixed_num,3]), in xmin, ymin, xmax, ymax form
N means the num of instances
"""
assert len(self.instance_list) != 0
instance_points_list = []
for instance in self.instance_list:
distances = np.linspace(0, instance.length, self.fixed_num)
if instance.has_z:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
else:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
instance_points_list.append(sampled_points)
instance_points_array = np.array(instance_points_list)
instance_points_tensor = to_tensor(instance_points_array)
instance_points_tensor = instance_points_tensor.to(
dtype=torch.float32)
instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
instance_points_tensor[:,:,2] = torch.clamp(instance_points_tensor[:,:,2], min=self.min_z,max=self.max_z)
instance_points_tensor = instance_points_tensor if is_3d else instance_points_tensor[:,:,:2]
instance_points_tensor = instance_points_tensor.unsqueeze(1)
return instance_points_tensor
@property
def fixed_num_sampled_points_torch(self):
"""
return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
N means the num of instances
"""
assert len(self.instance_list) != 0
instance_points_list = []
for instance in self.instance_list:
# distances = np.linspace(0, instance.length, self.fixed_num)
# sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
poly_pts = to_tensor(np.array(list(instance.coords)))
poly_pts = poly_pts.unsqueeze(0).permute(0,2,1)
sampled_pts = torch.nn.functional.interpolate(poly_pts,size=(self.fixed_num),mode='linear',align_corners=True)
sampled_pts = sampled_pts.permute(0,2,1).squeeze(0)
instance_points_list.append(sampled_pts)
# instance_points_array = np.array(instance_points_list)
# instance_points_tensor = to_tensor(instance_points_array)
instance_points_tensor = torch.stack(instance_points_list,dim=0)
instance_points_tensor = instance_points_tensor.to(
dtype=torch.float32)
instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
instance_points_tensor[:,:,2] = torch.clamp(instance_points_tensor[:,:,2], min=self.min_z,max=self.max_z)
return instance_points_tensor
@property
def shift_fixed_num_sampled_points(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
fixed_num = fixed_num_pts.shape[0]
shift_pts_list = []
if is_poly:
# import pdb;pdb.set_trace()
for shift_right_i in range(fixed_num):
shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
if not is_poly:
padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v1(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
pts_num = fixed_num_pts.shape[0]
shift_num = pts_num - 1
if is_poly:
pts_to_shift = fixed_num_pts[:-1,:]
shift_pts_list = []
if is_poly:
for shift_right_i in range(shift_num):
shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
if is_poly:
_, _, num_coords = shift_pts.shape
tmp_shift_pts = shift_pts.new_zeros((shift_num, pts_num, num_coords))
tmp_shift_pts[:,:-1,:] = shift_pts
tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
shift_pts = tmp_shift_pts
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
if not is_poly:
padding = torch.full([shift_num-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v2(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
assert len(self.instance_list) != 0
instances_list = []
for idx, instance in enumerate(self.instance_list):
instance_label = self.instance_labels[idx]
distances = np.linspace(0, instance.length, self.fixed_num)
poly_pts = np.array(list(instance.coords))
start_pts = poly_pts[0]
end_pts = poly_pts[-1]
is_poly = np.equal(start_pts, end_pts)
is_poly = is_poly.all()
shift_pts_list = []
pts_num, coords_num = poly_pts.shape
shift_num = pts_num - 1
final_shift_num = self.fixed_num - 1
if instance_label == 3:
# import ipdb;ipdb.set_trace()
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(sampled_points)
else:
if is_poly:
pts_to_shift = poly_pts[:-1,:]
for shift_right_i in range(shift_num):
shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
pts_to_concat = shift_pts[0]
pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
shift_instance = LineString(shift_pts)
shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(shift_sampled_points)
# import pdb;pdb.set_trace()
else:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
flip_sampled_points = np.flip(sampled_points, axis=0)
shift_pts_list.append(sampled_points)
shift_pts_list.append(flip_sampled_points)
multi_shifts_pts = np.stack(shift_pts_list,axis=0)
shifts_num,_,_ = multi_shifts_pts.shape
if shifts_num > final_shift_num:
index = np.random.choice(multi_shifts_pts.shape[0], final_shift_num, replace=False)
multi_shifts_pts = multi_shifts_pts[index]
multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
dtype=torch.float32)
multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
multi_shifts_pts_tensor[:,:,2] = torch.clamp(multi_shifts_pts_tensor[:,:,2], min=self.min_z,max=self.max_z)
# if not is_poly:
if multi_shifts_pts_tensor.shape[0] < final_shift_num:
padding = torch.full([final_shift_num-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
instances_list.append(multi_shifts_pts_tensor)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor[...,:self.code_size]
@property
def shift_fixed_num_sampled_points_v3(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
assert len(self.instance_list) != 0
instances_list = []
for instance in self.instance_list:
distances = np.linspace(0, instance.length, self.fixed_num)
poly_pts = np.array(list(instance.coords))
start_pts = poly_pts[0]
end_pts = poly_pts[-1]
is_poly = np.equal(start_pts, end_pts)
is_poly = is_poly.all()
shift_pts_list = []
pts_num, coords_num = poly_pts.shape
shift_num = pts_num - 1
final_shift_num = self.fixed_num - 1
if is_poly:
pts_to_shift = poly_pts[:-1,:]
for shift_right_i in range(shift_num):
shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
pts_to_concat = shift_pts[0]
pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
shift_instance = LineString(shift_pts)
shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(shift_sampled_points)
flip_pts_to_shift = np.flip(pts_to_shift, axis=0)
for shift_right_i in range(shift_num):
shift_pts = np.roll(flip_pts_to_shift,shift_right_i,axis=0)
pts_to_concat = shift_pts[0]
pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
shift_instance = LineString(shift_pts)
shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
shift_pts_list.append(shift_sampled_points)
# import pdb;pdb.set_trace()
else:
sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
flip_sampled_points = np.flip(sampled_points, axis=0)
shift_pts_list.append(sampled_points)
shift_pts_list.append(flip_sampled_points)
multi_shifts_pts = np.stack(shift_pts_list,axis=0)
shifts_num,_,_ = multi_shifts_pts.shape
# import pdb;pdb.set_trace()
if shifts_num > 2*final_shift_num:
index = np.random.choice(shift_num, final_shift_num, replace=False)
flip0_shifts_pts = multi_shifts_pts[index]
flip1_shifts_pts = multi_shifts_pts[index+shift_num]
multi_shifts_pts = np.concatenate((flip0_shifts_pts,flip1_shifts_pts),axis=0)
multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
dtype=torch.float32)
multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
multi_shifts_pts_tensor[:,:,2] = torch.clamp(multi_shifts_pts_tensor[:,:,2], min=self.min_z,max=self.max_z)
# if not is_poly:
if multi_shifts_pts_tensor.shape[0] < 2*final_shift_num:
padding = torch.full([final_shift_num*2-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
instances_list.append(multi_shifts_pts_tensor)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_v4(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
pts_num = fixed_num_pts.shape[0]
shift_num = pts_num - 1
shift_pts_list = []
if is_poly:
pts_to_shift = fixed_num_pts[:-1,:]
for shift_right_i in range(shift_num):
shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
flip_pts_to_shift = pts_to_shift.flip(0)
for shift_right_i in range(shift_num):
shift_pts_list.append(flip_pts_to_shift.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
if is_poly:
_, _, num_coords = shift_pts.shape
tmp_shift_pts = shift_pts.new_zeros((shift_num*2, pts_num, num_coords))
tmp_shift_pts[:,:-1,:] = shift_pts
tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
shift_pts = tmp_shift_pts
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
if not is_poly:
padding = torch.full([shift_num*2-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
@property
def shift_fixed_num_sampled_points_torch(self):
"""
return [instances_num, num_shifts, fixed_num, 2]
"""
fixed_num_sampled_points = self.fixed_num_sampled_points_torch
instances_list = []
is_poly = False
# is_line = False
# import pdb;pdb.set_trace()
for fixed_num_pts in fixed_num_sampled_points:
# [fixed_num, 2]
is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
fixed_num = fixed_num_pts.shape[0]
shift_pts_list = []
if is_poly:
# import pdb;pdb.set_trace()
for shift_right_i in range(fixed_num):
shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
else:
shift_pts_list.append(fixed_num_pts)
shift_pts_list.append(fixed_num_pts.flip(0))
shift_pts = torch.stack(shift_pts_list,dim=0)
shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
if not is_poly:
padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
shift_pts = torch.cat([shift_pts,padding],dim=0)
# padding = np.zeros((self.num_samples - len(sampled_points), 2))
# sampled_points = np.concatenate([sampled_points, padding], axis=0)
instances_list.append(shift_pts)
instances_tensor = torch.stack(instances_list, dim=0)
instances_tensor = instances_tensor.to(
dtype=torch.float32)
return instances_tensor
# @property
# def polyline_points(self):
# """
# return [[x0,y0],[x1,y1],...]
# """
# assert len(self.instance_list) != 0
# for instance in self.instance_list:
class VectorizedAV2LocalMap(object):
CLASS2LABEL = {
'divider': 0,
'ped_crossing': 1,
'boundary': 2,
'centerline': 3,
'others': -1
}
def __init__(self,
canvas_size,
patch_size,
map_classes=['divider','ped_crossing','boundary'],
sample_dist=1,
num_samples=250,
padding=False,
fixed_ptsnum_per_line=-1,
padding_value=-10000,
code_size=2,
min_z=-2,
max_z=2,
thickness=3,
aux_seg = dict(
use_aux_seg=False,
bev_seg=False,
pv_seg=False,
seg_classes=1,
feat_down_sample=32)):
'''
Args:
fixed_ptsnum_per_line = -1 : no fixed num
'''
super().__init__()
self.vec_classes = map_classes
self.sample_dist = sample_dist
self.num_samples = num_samples
self.padding = padding
self.fixed_num = fixed_ptsnum_per_line
self.padding_value = padding_value
# for semantic mask
self.patch_size = patch_size
self.canvas_size = canvas_size
self.thickness = thickness
self.scale_x = self.canvas_size[1] / self.patch_size[1]
self.scale_y = self.canvas_size[0] / self.patch_size[0]
# self.auxseg_use_sem = auxseg_use_sem
self.aux_seg = aux_seg
self.code_size =code_size
def gen_vectorized_samples(self, map_annotation, example=None, feat_down_sample=32):
'''
use lidar2global to get gt map layers
'''
# avm = ArgoverseStaticMap.from_map_dir(log_map_dirpath, build_raster=False)
vectors = []
for vec_class in self.vec_classes:
instance_list = map_annotation[vec_class]
for instance in instance_list:
if instance.shape[0] < 2:
# print('class : {}, instance : {}, instance_list : {}'.format(vec_class, instance, instance_list))
continue
vectors.append((LineString(np.array(instance)), self.CLASS2LABEL.get(vec_class, -1)))
filtered_vectors = []
gt_pts_loc_3d = []
gt_pts_num_3d = []
gt_labels = []
gt_instance = []
# import ipdb;ipdb.set_trace()
if self.aux_seg['use_aux_seg']:
if self.aux_seg['seg_classes'] == 1:
if self.aux_seg['bev_seg']:
gt_semantic_mask = np.zeros((1, self.canvas_size[0], self.canvas_size[1]), dtype=np.uint8)
else:
gt_semantic_mask = None
# import ipdb;ipdb.set_trace()
if self.aux_seg['pv_seg']:
num_cam = len(example['img_metas'].data['pad_shape'])
img_shape = example['img_metas'].data['pad_shape'][0]
# import ipdb;ipdb.set_trace()
gt_pv_semantic_mask = np.zeros((num_cam, 1, img_shape[0] // feat_down_sample, img_shape[1] // feat_down_sample), dtype=np.uint8)
lidar2img = example['img_metas'].data['lidar2img']
scale_factor = np.eye(4)
scale_factor[0, 0] *= 1/32
scale_factor[1, 1] *= 1/32
lidar2feat = [scale_factor @ l2i for l2i in lidar2img]
else:
gt_pv_semantic_mask = None
for instance, instance_type in vectors:
if instance_type != -1:
gt_instance.append(instance)
gt_labels.append(instance_type)
if instance.geom_type == 'LineString':
if self.aux_seg['bev_seg']:
self.line_ego_to_mask(instance, gt_semantic_mask[0], color=1, thickness=self.thickness)
if self.aux_seg['pv_seg']:
for cam_index in range(num_cam):
self.line_ego_to_pvmask(instance, gt_pv_semantic_mask[cam_index][0], lidar2feat[cam_index],color=1, thickness=self.aux_seg['pv_thickness'])
else:
print(instance.geom_type)
else:
if self.aux_seg['bev_seg']:
gt_semantic_mask = np.zeros((len(self.vec_classes), self.canvas_size[0], self.canvas_size[1]), dtype=np.uint8)
else:
gt_semantic_mask = None
if self.aux_seg['pv_seg']:
num_cam = len(example['img_metas'].data['pad_shape'])
gt_pv_semantic_mask = np.zeros((num_cam, len(self.vec_classes), img_shape[0] // feat_down_sample, img_shape[1] // feat_down_sample), dtype=np.uint8)
lidar2img = example['img_metas'].data['lidar2img']
scale_factor = np.eye(4)
scale_factor[0, 0] *= 1/32
scale_factor[1, 1] *= 1/32
lidar2feat = [scale_factor @ l2i for l2i in lidar2img]
else:
gt_pv_semantic_mask = None
for instance, instance_type in vectors:
if instance_type != -1:
gt_instance.append(instance)
gt_labels.append(instance_type)
if instance.geom_type == 'LineString':
if self.aux_seg['bev_seg']:
self.line_ego_to_mask(instance, gt_semantic_mask[instance_type], color=1, thickness=self.thickness)
if self.aux_seg['pv_seg']:
for cam_index in range(num_cam):
self.line_ego_to_pvmask(instance, gt_pv_semantic_mask[cam_index][instance_type], lidar2feat[cam_index],color=1, thickness=self.aux_seg['pv_thickness'])
else:
print(instance.geom_type)
else:
for instance, instance_type in vectors:
if instance_type != -1:
gt_instance.append(instance)
gt_labels.append(instance_type)
gt_semantic_mask=None
gt_pv_semantic_mask=None
gt_instance = LiDARInstanceLines(gt_instance, gt_labels, self.sample_dist,
self.num_samples, self.padding, self.fixed_num,self.padding_value, patch_size=self.patch_size, code_size=self.code_size)
anns_results = dict(
gt_vecs_pts_loc=gt_instance,
gt_vecs_label=gt_labels,
gt_semantic_mask=gt_semantic_mask,
gt_pv_semantic_mask=gt_pv_semantic_mask,
)
return anns_results
def line_ego_to_pvmask(self,
line_ego,
mask,
lidar2feat,
color=1,
thickness=1):
distances = np.linspace(0, line_ego.length, 200)
coords = np.array([list(line_ego.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
pts_num = coords.shape[0]
ones = np.ones((pts_num,1))
lidar_coords = np.concatenate([coords,ones], axis=1).transpose(1,0)
pix_coords = perspective(lidar_coords, lidar2feat)
cv2.polylines(mask, np.int32([pix_coords]), False, color=color, thickness=thickness)
def line_ego_to_mask(self,
line_ego,
mask,
color=1,
thickness=3):
''' Rasterize a single line to mask.
Args:
line_ego (LineString): line
mask (array): semantic mask to paint on
color (int): positive label, default: 1
thickness (int): thickness of rasterized lines, default: 3
'''
trans_x = self.canvas_size[1] / 2
trans_y = self.canvas_size[0] / 2
line_ego = affinity.scale(line_ego, self.scale_x, self.scale_y, origin=(0, 0))
line_ego = affinity.affine_transform(line_ego, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
# print(np.array(list(line_ego.coords), dtype=np.int32).shape)
coords = np.array(list(line_ego.coords), dtype=np.int32)[:, :2]
coords = coords.reshape((-1, 2))
assert len(coords) >= 2
cv2.polylines(mask, np.int32([coords]), False, color=color, thickness=thickness)
@DATASETS.register_module()
class CustomAV2OfflineLocalMapDataset(CustomNuScenesDataset):
r"""NuScenes Dataset.
This datset add static map elements
"""
MAPCLASSES = ('divider',)
def __init__(self,
map_ann_file=None,
queue_length=4,
z_cfg = dict(
pred_z_flag=True,
gt_z_flag=True,
),
bev_size=(200, 200),
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
overlap_test=False,
fixed_ptsnum_per_line=-1,
eval_use_same_gt_sample_num_flag=False,
padding_value=-10000,
map_classes=None,
aux_seg = dict(
use_aux_seg=False,
bev_seg=False,
pv_seg=False,
seg_classes=1,
feat_down_sample=32,
),
*args,
**kwargs):
super().__init__(*args, **kwargs)
self.map_ann_file = map_ann_file
self.z_cfg = z_cfg
if z_cfg['gt_z_flag']:
self.code_size = 3
else:
self.code_size = 2
self.queue_length = queue_length
self.overlap_test = overlap_test
self.bev_size = bev_size
self.MAPCLASSES = self.get_map_classes(map_classes)
self.NUM_MAPCLASSES = len(self.MAPCLASSES)
self.pc_range = pc_range
patch_h = pc_range[4]-pc_range[1]
patch_w = pc_range[3]-pc_range[0]
self.patch_size = (patch_h, patch_w)
self.min_z = pc_range[2]
self.max_z = pc_range[5]
self.padding_value = padding_value
self.fixed_num = fixed_ptsnum_per_line
self.eval_use_same_gt_sample_num_flag = eval_use_same_gt_sample_num_flag
self.aux_seg = aux_seg
self.vector_map = VectorizedAV2LocalMap(canvas_size=bev_size,
patch_size=self.patch_size,
map_classes=self.MAPCLASSES,
fixed_ptsnum_per_line=fixed_ptsnum_per_line,
padding_value=self.padding_value,
code_size=self.code_size,
min_z=self.min_z,
max_z=self.max_z,
aux_seg=aux_seg)
self.is_vis_on_test = False
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
data = mmcv.load(ann_file, file_format='pkl')
# import pdb;pdb.set_trace()
data_infos = list(sorted(data['samples'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
# data_infos = [ data_info.update(dict(token= str(data_info['timestamp']+data_info['log_id']))) for data_info in data_infos]
self.metadata = None
self.version = None
return data_infos
@classmethod
def get_map_classes(cls, map_classes=None):
"""Get class names of current dataset.
Args:
classes (Sequence[str] | str | None): If classes is None, use
default CLASSES defined by builtin dataset. If classes is a
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Return:
list[str]: A list of class names.
"""
if map_classes is None:
return cls.MAPCLASSES
if isinstance(map_classes, str):
# take it as a file path
class_names = mmcv.list_from_file(map_classes)
elif isinstance(map_classes, (tuple, list)):
class_names = map_classes
else:
raise ValueError(f'Unsupported type {type(map_classes)} of map classes.')
return class_names
def vectormap_pipeline(self, example, input_dict):
'''
`example` type: <class 'dict'>
keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';
all keys type is 'DataContainer';
'img_metas' cpu_only=True, type is dict, others are false;
'gt_labels_3d' shape torch.size([num_samples]), stack=False,
padding_value=0, cpu_only=False
'gt_bboxes_3d': stack=False, cpu_only=True
'''
# import ipdb;ipdb.set_trace()
anns_results = self.vector_map.gen_vectorized_samples(input_dict['annotation'] if 'annotation' in input_dict.keys() else input_dict['ann_info'],
example=example, feat_down_sample=self.aux_seg['feat_down_sample'])
'''
anns_results, type: dict
'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates
'gt_vecs_pts_num': list[num_vecs], vec with num_points
'gt_vecs_label': list[num_vecs], vec with cls index
'''
gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])
if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):
gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']
else:
gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])
try:
gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)
except:
# empty tensor, will be passed in train,
# but we preserve it for test
gt_vecs_pts_loc = gt_vecs_pts_loc
example['gt_labels_3d'] = DC(gt_vecs_label, cpu_only=False)
example['gt_bboxes_3d'] = DC(gt_vecs_pts_loc, cpu_only=True)
# import pdb;pdb.set_trace()
# if self.is_vis_on_test:
# lidar2global_translation = to_tensor(lidar2global_translation)
# example['lidar2global_translation'] = DC(lidar2global_translation, cpu_only=True)
# else:
# example['img_metas'].data['lidar2global_translation'] = lidar2global_translation
if anns_results['gt_semantic_mask'] is not None:
example['gt_seg_mask'] = DC(to_tensor(anns_results['gt_semantic_mask']), cpu_only=False)
if anns_results['gt_pv_semantic_mask'] is not None:
example['gt_pv_seg_mask'] = DC(to_tensor(anns_results['gt_pv_semantic_mask']), cpu_only=False)
return example
def prepare_train_data(self, index):
"""
Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
data_queue = []
# temporal aug
prev_indexs_list = list(range(index-self.queue_length, index))
random.shuffle(prev_indexs_list)
prev_indexs_list = sorted(prev_indexs_list[1:], reverse=True)
##
input_dict = self.get_data_info(index)
if input_dict is None:
return None
frame_idx = input_dict['timestamp']
scene_token = input_dict['log_id']
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
example = self.vectormap_pipeline(example,input_dict)
if self.filter_empty_gt and \
(example is None or ~(example['gt_labels_3d']._data != -1).any()):
return None
# self.vis_gt(example)
data_queue.insert(0, example)
return self.union2one(data_queue)
def union2one(self, queue):
"""
convert sample queue into one single sample.
"""
imgs_list = [each['img'].data for each in queue]
metas_map = {}
prev_pos = None
prev_angle = None
for i, each in enumerate(queue):
metas_map[i] = each['img_metas'].data
if i == 0:
metas_map[i]['prev_bev'] = False
prev_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
prev_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
metas_map[i]['can_bus'][:3] = 0
metas_map[i]['can_bus'][-1] = 0
else:
metas_map[i]['prev_bev'] = True
tmp_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
tmp_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
metas_map[i]['can_bus'][:3] -= prev_pos
metas_map[i]['can_bus'][-1] -= prev_angle
prev_pos = copy.deepcopy(tmp_pos)
prev_angle = copy.deepcopy(tmp_angle)
queue[-1]['img'] = DC(torch.stack(imgs_list),
cpu_only=False, stack=True)
queue[-1]['img_metas'] = DC(metas_map, cpu_only=True)
queue = queue[-1]
return queue
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data \
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations \
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info = self.data_infos[index]
# standard protocal modified from SECOND.Pytorch
input_dict = dict(
timestamp=info['timestamp'],
pts_filename=info['lidar_path'],
lidar_path=info['lidar_path'],
ego2global_translation=info['e2g_translation'],
ego2global_rotation=info['e2g_rotation'],
log_id=info['log_id'],
scene_token=info['log_id'],
)
if self.modality['use_camera']:
image_paths = []
cam_intrinsics = []
ego2img_rts = []
ego2cam_rts = []
cam_types = []
cam2ego_rts = []
input_dict["camego2global"] = []
for cam_type, cam_info in info['cams'].items():
image_paths.append(cam_info['img_fpath'])
# camera intrinsics
camera_intrinsics = np.eye(4).astype(np.float32)
camera_intrinsics[:3, :3] = cam_info["intrinsics"]
# input_dict["camera_intrinsics"].append(camera_intrinsics)
# ego2img, ego = lidar
ego2cam_rt = cam_info['extrinsics']
cam2ego_rts.append(np.matrix(ego2cam_rt).I)
intrinsic = cam_info['intrinsics']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
ego2img_rt = (viewpad @ ego2cam_rt)
ego2img_rts.append(ego2img_rt)
ego2cam_rts.append(ego2cam_rt)
cam_intrinsics.append(viewpad)
cam_types.append(cam_type)
camego2global = np.eye(4, dtype=np.float32)
camego2global[:3, :3] = cam_info['e2g_rotation']
camego2global[:3, 3] = cam_info['e2g_translation']
camego2global = torch.from_numpy(camego2global)
input_dict["camego2global"].append(camego2global)
lidar2ego = np.eye(4).astype(np.float32)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=ego2img_rts, # 认为lidar和ego是同一个坐标系
camera_intrinsics=cam_intrinsics,
ego2cam=ego2cam_rts,
camera2ego=cam2ego_rts,
cam_type=cam_types,
lidar2ego=lidar2ego,
))
# if not self.test_mode:
# # annos = self.get_ann_info(index)
# input_dict['ann_info'] = dict()
input_dict['ann_info'] = info['annotation']
translation = input_dict['ego2global_translation']
can_bus = np.ones(18)
# can_bus.extend(translation.tolist())
can_bus[:3] = translation
rotation = Quaternion._from_matrix(input_dict['ego2global_rotation'])
can_bus[3:7] = rotation
patch_angle = quaternion_yaw(rotation) / np.pi * 180
if patch_angle < 0:
patch_angle += 360
can_bus[-2] = patch_angle / 180 * np.pi
can_bus[-1] = patch_angle
input_dict['can_bus'] = can_bus
# import pdb;pdb.set_trace()
return input_dict
def prepare_test_data(self, index):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if self.is_vis_on_test:
example = self.vectormap_pipeline(example, input_dict)
return example
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _format_gt(self):
gt_annos = []
# import ipdb;ipdb.set_trace()
print('Start to convert gt map format...')
assert self.map_ann_file is not None
if (not os.path.exists(self.map_ann_file)) :
dataset_length = len(self)
prog_bar = mmcv.ProgressBar(dataset_length)
mapped_class_names = self.MAPCLASSES
for sample_id in range(dataset_length):
sample_token = self.data_infos[sample_id]['token']
gt_anno = {}
gt_anno['sample_token'] = sample_token
# gt_sample_annos = []
gt_sample_dict = {}
gt_sample_dict = self.vectormap_pipeline(gt_sample_dict, self.data_infos[sample_id])
gt_labels = gt_sample_dict['gt_labels_3d'].data.numpy()
gt_vecs = gt_sample_dict['gt_bboxes_3d'].data.instance_list
# import pdb;pdb.set_trace()
gt_vec_list = []
for i, (gt_label, gt_vec) in enumerate(zip(gt_labels, gt_vecs)):
name = mapped_class_names[gt_label]
anno = dict(
pts=np.array(list(gt_vec.coords))[:,:self.code_size],
pts_num=len(list(gt_vec.coords)),
cls_name=name,
type=gt_label,
)
gt_vec_list.append(anno)
gt_anno['vectors']=gt_vec_list
gt_annos.append(gt_anno)
prog_bar.update()
nusc_submissions = {
'GTs': gt_annos
}
print('\n GT anns writes to', self.map_ann_file)
mmcv.dump(nusc_submissions, self.map_ann_file)
else:
print(f'{self.map_ann_file} exist, not update')
def _format_bbox(self, results, jsonfile_prefix=None):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
assert self.map_ann_file is not None
pred_annos = []
mapped_class_names = self.MAPCLASSES
# import ipdb;ipdb.set_trace()
print('Start to convert map detection format...')
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
pred_anno = {}
vecs = output_to_vecs(det)
sample_token = self.data_infos[sample_id]['token']
pred_anno['sample_token'] = sample_token
pred_vec_list=[]
for i, vec in enumerate(vecs):
name = mapped_class_names[vec['label']]
anno = dict(
# sample_token=sample_token,
pts=vec['pts'],
pts_num=len(vec['pts']),
cls_name=name,
type=vec['label'],
confidence_level=vec['score'])
pred_vec_list.append(anno)
# annos.append(nusc_anno)
# nusc_annos[sample_token] = annos
pred_anno['vectors'] = pred_vec_list
pred_annos.append(pred_anno)
if not os.path.exists(self.map_ann_file):
self._format_gt()
else:
print(f'{self.map_ann_file} exist, not update')
# with open(self.map_ann_file,'r') as f:
# GT_anns = json.load(f)
# gt_annos = GT_anns['GTs']
nusc_submissions = {
'meta': self.modality,
'results': pred_annos,
# 'GTs': gt_annos
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, 'av2map_results.json')
print('Results writes to', res_path)
mmcv.dump(nusc_submissions, res_path)
return res_path
def to_gt_vectors(self,
gt_dict):
# import pdb;pdb.set_trace()
gt_labels = gt_dict['gt_labels_3d'].data
gt_instances = gt_dict['gt_bboxes_3d'].data.instance_list
gt_vectors = []
for gt_instance, gt_label in zip(gt_instances, gt_labels):
pts, pts_num = sample_pts_from_line(gt_instance, patch_size=self.patch_size)
gt_vectors.append({
'pts': pts,
'pts_num': pts_num,
'type': int(gt_label)
})
vector_num_list = {}
for i in range(self.NUM_MAPCLASSES):
vector_num_list[i] = []
for vec in gt_vectors:
if vector['pts_num'] >= 2:
vector_num_list[vector['type']].append((LineString(vector['pts'][:vector['pts_num']]), vector.get('confidence_level', 1)))
return gt_vectors
def _evaluate_single(self,
result_path,
logger=None,
metric='chamfer',
result_name='pts_bbox'):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import eval_map
from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import format_res_gt_by_classes
result_path = osp.abspath(result_path)
# import pdb;pdb.set_trace()
detail = dict()
print('Formating results & gts by classes')
with open(result_path,'r') as f:
pred_results = json.load(f)
gen_results = pred_results['results']
with open(self.map_ann_file,'r') as ann_f:
gt_anns = json.load(ann_f)
annotations = gt_anns['GTs']
cls_gens, cls_gts = format_res_gt_by_classes(result_path,
gen_results,
annotations,
cls_names=self.MAPCLASSES,
num_pred_pts_per_instance=self.fixed_num,
eval_use_same_gt_sample_num_flag=self.eval_use_same_gt_sample_num_flag,
pc_range=self.pc_range,
code_size=self.code_size)
metrics = metric if isinstance(metric, list) else [metric]
allowed_metrics = ['chamfer', 'iou']
for metric in metrics:
if metric not in allowed_metrics:
raise KeyError(f'metric {metric} is not supported')
for metric in metrics:
print('-*'*10+f'use metric:{metric}'+'-*'*10)
if metric == 'chamfer':
thresholds = [0.5,1.0,1.5]
elif metric == 'iou':
thresholds= np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
cls_aps = np.zeros((len(thresholds),self.NUM_MAPCLASSES))
for i, thr in enumerate(thresholds):
print('-*'*10+f'threshhold:{thr}'+'-*'*10)
mAP, cls_ap = eval_map(
gen_results,
annotations,
cls_gens,
cls_gts,
threshold=thr,
cls_names=self.MAPCLASSES,
logger=logger,
num_pred_pts_per_instance=self.fixed_num,
pc_range=self.pc_range,
metric=metric,
code_size=self.code_size)
for j in range(self.NUM_MAPCLASSES):
cls_aps[i, j] = cls_ap[j]['ap']
for i, name in enumerate(self.MAPCLASSES):
print('{}: {}'.format(name, cls_aps.mean(0)[i]))
detail['AV2Map_{}/{}_AP'.format(metric,name)] = cls_aps.mean(0)[i]
print('map: {}'.format(cls_aps.mean(0).mean()))
detail['AV2Map_{}/mAP'.format(metric)] = cls_aps.mean(0).mean()
for i, name in enumerate(self.MAPCLASSES):
for j, thr in enumerate(thresholds):
if metric == 'chamfer':
detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
elif metric == 'iou':
if thr == 0.5 or thr == 0.75:
detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
return detail
def evaluate(self,
results,
metric='bbox',
logger=None,
jsonfile_prefix=None,
result_names=['pts_bbox'],
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in nuScenes protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
print('Evaluating bboxes of {}'.format(name))
ret_dict = self._evaluate_single(result_files[name], metric=metric)
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(result_files, metric=metric)
if tmp_dir is not None:
tmp_dir.cleanup()
if show:
self.show(results, out_dir, pipeline=pipeline)
return results_dict
def output_to_vecs(detection):
box3d = detection['boxes_3d'].numpy()
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
pts = detection['pts_3d'].numpy()
vec_list = []
# import pdb;pdb.set_trace()
for i in range(box3d.shape[0]):
vec = dict(
bbox = box3d[i], # xyxy
label=labels[i],
score=scores[i],
pts=pts[i],
)
vec_list.append(vec)
return vec_list
def sample_pts_from_line(line,
fixed_num=-1,
sample_dist=1,
normalize=False,
patch_size=None,
padding=False,
num_samples=250,):
if fixed_num < 0:
distances = np.arange(0, line.length, sample_dist)
if line.has_z:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
else:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
else:
# fixed number of points, so distance is line.length / fixed_num
distances = np.linspace(0, line.length, fixed_num)
if line.has_z:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
else:
sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
if normalize:
sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
num_valid = len(sampled_points)
if not padding or fixed_num > 0:
# fixed num sample can return now!
return sampled_points, num_valid
# fixed distance sampling need padding!
num_valid = len(sampled_points)
if fixed_num < 0:
if num_valid < num_samples:
padding = np.zeros((num_samples - len(sampled_points), sampled_points.shape[-1]))
sampled_points = np.concatenate([sampled_points, padding], axis=0)
else:
sampled_points = sampled_points[:num_samples, :]
num_valid = num_samples
if normalize:
sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
num_valid = len(sampled_points)
return sampled_points[:,:2], num_valid
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import platform
import random
from functools import partial
import numpy as np
from mmcv.parallel import collate
from mmcv.runner import get_dist_info
from mmcv.utils import Registry, build_from_cfg
from torch.utils.data import DataLoader
from mmdet.datasets.samplers import GroupSampler
from projects.mmdet3d_plugin.datasets.samplers.group_sampler import DistributedGroupSampler
from projects.mmdet3d_plugin.datasets.samplers.distributed_sampler import DistributedSampler
from projects.mmdet3d_plugin.datasets.samplers.sampler import build_sampler
def build_dataloader(dataset,
samples_per_gpu,
workers_per_gpu,
num_gpus=1,
dist=True,
shuffle=True,
seed=None,
shuffler_sampler=None,
nonshuffler_sampler=None,
**kwargs):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank, world_size = get_dist_info()
if dist:
# DistributedGroupSampler will definitely shuffle the data to satisfy
# that images on each GPU are in the same group
if shuffle:
sampler = build_sampler(shuffler_sampler if shuffler_sampler is not None else dict(type='DistributedGroupSampler'),
dict(
dataset=dataset,
samples_per_gpu=samples_per_gpu,
num_replicas=world_size,
rank=rank,
seed=seed)
)
else:
sampler = build_sampler(nonshuffler_sampler if nonshuffler_sampler is not None else dict(type='DistributedSampler'),
dict(
dataset=dataset,
num_replicas=world_size,
rank=rank,
shuffle=shuffle,
seed=seed)
)
batch_size = samples_per_gpu
num_workers = workers_per_gpu
else:
# assert False, 'not support in bevformer'
print('WARNING!!!!, Only can be used for obtain inference speed!!!!')
sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
batch_size = num_gpus * samples_per_gpu
num_workers = num_gpus * workers_per_gpu
init_fn = partial(
worker_init_fn, num_workers=num_workers, rank=rank,
seed=seed) if seed is not None else None
data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
num_workers=num_workers,
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
pin_memory=True,
worker_init_fn=init_fn,
**kwargs)
return data_loader
def worker_init_fn(worker_id, num_workers, rank, seed):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed = num_workers * rank + worker_id + seed
np.random.seed(worker_seed)
random.seed(worker_seed)
# Copyright (c) OpenMMLab. All rights reserved.
import platform
from mmcv.utils import Registry, build_from_cfg
from mmdet.datasets import DATASETS
from mmdet.datasets.builder import _concat_dataset
if platform.system() != 'Windows':
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
base_soft_limit = rlimit[0]
hard_limit = rlimit[1]
soft_limit = min(max(4096, base_soft_limit), hard_limit)
resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
OBJECTSAMPLERS = Registry('Object sampler')
def custom_build_dataset(cfg, default_args=None):
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
ConcatDataset, RepeatDataset)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([custom_build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'ConcatDataset':
dataset = ConcatDataset(
[custom_build_dataset(c, default_args) for c in cfg['datasets']],
cfg.get('separate_eval', True))
elif cfg['type'] == 'RepeatDataset':
dataset = RepeatDataset(
custom_build_dataset(cfg['dataset'], default_args), cfg['times'])
elif cfg['type'] == 'ClassBalancedDataset':
dataset = ClassBalancedDataset(
custom_build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
elif cfg['type'] == 'CBGSDataset':
dataset = CBGSDataset(custom_build_dataset(cfg['dataset'], default_args))
elif isinstance(cfg.get('ann_file'), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
# from .CD_loss import MyChamferDistance
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment