Commit e5b1ec11 authored by wuyuefeng's avatar wuyuefeng Committed by zhangwenwei
Browse files

Head docs

parent 7f7c73a9
...@@ -28,7 +28,8 @@ class PartialBinBasedBBoxCoder(BaseBBoxCoder): ...@@ -28,7 +28,8 @@ class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Encode ground truth to prediction targets. """Encode ground truth to prediction targets.
Args: Args:
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes with shape (n, 7). gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes with
shape (n, 7).
gt_labels_3d (Tensor): gt classes. gt_labels_3d (Tensor): gt classes.
Returns: Returns:
......
...@@ -213,7 +213,8 @@ def indoor_eval(gt_annos, ...@@ -213,7 +213,8 @@ def indoor_eval(gt_annos,
dt_annos (list[dict]): Detection annotations. the dict dt_annos (list[dict]): Detection annotations. the dict
includes the following keys includes the following keys
- labels_3d (Tensor): Labels of boxes. - labels_3d (Tensor): Labels of boxes.
- boxes_3d (BaseInstance3DBoxes): 3d bboxes in Depth coordinate. - boxes_3d (:obj:BaseInstance3DBoxes): 3d bboxes in
Depth coordinate.
- scores_3d (Tensor): Scores of boxes. - scores_3d (Tensor): Scores of boxes.
metric (list[float]): AP IoU thresholds. metric (list[float]): AP IoU thresholds.
label2cat (dict): {label: cat}. label2cat (dict): {label: cat}.
......
...@@ -19,8 +19,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -19,8 +19,8 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
Args: Args:
num_classes (int): Number of classes. num_classes (int): Number of classes.
in_channels (int): Number of channels in the input feature map. in_channels (int): Number of channels in the input feature map.
train_cfg (dict): train configs train_cfg (dict): Train configs.
test_cfg (dict): test configs test_cfg (dict): Test configs.
feat_channels (int): Number of channels of the feature map. feat_channels (int): Number of channels of the feature map.
use_direction_classifier (bool): Whether to add a direction classifier. use_direction_classifier (bool): Whether to add a direction classifier.
anchor_generator(dict): Config dict of anchor generator. anchor_generator(dict): Config dict of anchor generator.
...@@ -29,11 +29,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -29,11 +29,11 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
assign_per_class (bool): Whether to do assignment for each class. assign_per_class (bool): Whether to do assignment for each class.
diff_rad_by_sin (bool): Whether to change the difference into sin diff_rad_by_sin (bool): Whether to change the difference into sin
difference for box regression loss. difference for box regression loss.
dir_offset (float | int): The offset of BEV rotation angles dir_offset (float | int): The offset of BEV rotation angles.
(TODO: may be moved into box coder) (TODO: may be moved into box coder)
dirlimit_offset (float | int): The limited range of BEV rotation angles dir_limit_offset (float | int): The limited range of BEV
(TODO: may be moved into box coder) rotation angles. (TODO: may be moved into box coder)
box_coder (dict): Config dict of box coders. bbox_coder (dict): Config dict of box coders.
loss_cls (dict): Config of classification loss. loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss. loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classifier loss. loss_dir (dict): Config of direction classifier loss.
...@@ -131,6 +131,15 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -131,6 +131,15 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
normal_init(self.conv_reg, std=0.01) normal_init(self.conv_reg, std=0.01)
def forward_single(self, x): def forward_single(self, x):
"""Forward function on a single-scale feature map.
Args:
x (Tensor): Input features.
Returns:
tuple[Tensor]: Contain score of each class, bbox predictions
and class predictions of direction.
"""
cls_score = self.conv_cls(x) cls_score = self.conv_cls(x)
bbox_pred = self.conv_reg(x) bbox_pred = self.conv_reg(x)
dir_cls_preds = None dir_cls_preds = None
...@@ -139,6 +148,16 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -139,6 +148,16 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
return cls_score, bbox_pred, dir_cls_preds return cls_score, bbox_pred, dir_cls_preds
def forward(self, feats): def forward(self, feats):
"""Forward pass.
Args:
feats (list[Tensor]): Multi-level features, e.g.,
features produced by FPN.
Returns:
tuple[list[Tensor]]: Multi-level class score, bbox
and direction predictions.
"""
return multi_apply(self.forward_single, feats) return multi_apply(self.forward_single, feats)
def get_anchors(self, featmap_sizes, input_metas, device='cuda'): def get_anchors(self, featmap_sizes, input_metas, device='cuda'):
...@@ -163,6 +182,24 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -163,6 +182,24 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels, def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels,
label_weights, bbox_targets, bbox_weights, dir_targets, label_weights, bbox_targets, bbox_weights, dir_targets,
dir_weights, num_total_samples): dir_weights, num_total_samples):
"""Calculate loss of Single-level results.
Args:
cls_score (Tensor): Class score in single-level.
bbox_pred (Tensor): Bbox prediction in single-level.
dir_cls_preds (Tensor): Predictions of direction class
in single-level.
labels (Tensor): Labels of class.
label_weights (Tensor): Weights of class loss.
bbox_targets (Tensor): Targets of bbox predictions.
bbox_weights (Tensor): Weights of bbox loss.
dir_targets (Tensor): Targets of direction predictions.
dir_weights (Tensor): Weights of direction loss.
num_total_samples (int): The number of valid samples.
Returns:
tuple[Tensor]: losses of class, bbox and direction, respectively.
"""
# classification loss # classification loss
if num_total_samples is None: if num_total_samples is None:
num_total_samples = int(cls_score.shape[0]) num_total_samples = int(cls_score.shape[0])
...@@ -235,6 +272,22 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -235,6 +272,22 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
gt_labels, gt_labels,
input_metas, input_metas,
gt_bboxes_ignore=None): gt_bboxes_ignore=None):
"""Calculate losses.
Args:
cls_scores (list[Tensor]): Multi-level class scores.
bbox_preds (list[Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[Tensor]): Multi-level direction
class predictions.
gt_bboxes (list[:obj:BaseInstance3DBoxes]): Gt bboxes
of each sample.
gt_labels (list[Tensor]): Gt labels of each sample.
input_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
Returns:
dict: Contain class, bbox and direction losses of each level.
"""
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
assert len(featmap_sizes) == self.anchor_generator.num_levels assert len(featmap_sizes) == self.anchor_generator.num_levels
device = cls_scores[0].device device = cls_scores[0].device
...@@ -284,6 +337,20 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -284,6 +337,20 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
input_metas, input_metas,
cfg=None, cfg=None,
rescale=False): rescale=False):
"""Get bboxes of anchor head.
Args:
cls_scores (list[Tensor]): Multi-level class scores.
bbox_preds (list[Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[Tensor]): Multi-level direction
class predictions.
input_metas (list[dict]): Contain pcd and img's meta info.
cfg (None | ConfigDict): Training or testing config.
rescale (list[Tensor]): whether th rescale bbox.
Returns:
list[tuple]: prediction resultes of batches.
"""
assert len(cls_scores) == len(bbox_preds) assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds) assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores) num_levels = len(cls_scores)
...@@ -322,6 +389,24 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -322,6 +389,24 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
input_meta, input_meta,
cfg=None, cfg=None,
rescale=False): rescale=False):
"""Get bboxes of single branch.
Args:
cls_scores (Tensor): Class score in single batch.
bbox_preds (Tensor): Bbox prediction in single batch.
dir_cls_preds (Tensor): Predictions of direction class
in single batch.
mlvl_anchors (List[Tensor]): Multi-level anchors in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (None | ConfigDict): Training or testing config.
rescale (list[Tensor]): whether th rescale bbox.
Returns:
tuple: Contain predictions of single batch.
- bboxes (:obj:BaseInstance3DBoxes): Predicted 3d bboxes.
- scores (Tensor): Class score of each bbox.
- labels (Tensor): Label of each bbox.
"""
cfg = self.test_cfg if cfg is None else cfg cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = [] mlvl_bboxes = []
......
...@@ -39,9 +39,9 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -39,9 +39,9 @@ class PartA2RPNHead(Anchor3DHead):
difference for box regression loss. difference for box regression loss.
dir_offset (float | int): The offset of BEV rotation angles dir_offset (float | int): The offset of BEV rotation angles
(TODO: may be moved into box coder) (TODO: may be moved into box coder)
dirlimit_offset (float | int): The limited range of BEV rotation angles dir_limit_offset (float | int): The limited range of BEV
(TODO: may be moved into box coder) rotation angles. (TODO: may be moved into box coder)
box_coder (dict): Config dict of box coders. bbox_coder (dict): Config dict of box coders.
loss_cls (dict): Config of classification loss. loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss. loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classifier loss. loss_dir (dict): Config of direction classifier loss.
...@@ -89,6 +89,25 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -89,6 +89,25 @@ class PartA2RPNHead(Anchor3DHead):
input_meta, input_meta,
cfg, cfg,
rescale=False): rescale=False):
"""Get bboxes of single branch.
Args:
cls_scores (Tensor): Class score in single batch.
bbox_preds (Tensor): Bbox prediction in single batch.
dir_cls_preds (Tensor): Predictions of direction class
in single batch.
mlvl_anchors (List[Tensor]): Multi-level anchors in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (None | ConfigDict): Training or testing config.
rescale (list[Tensor]): whether th rescale bbox.
Returns:
dict: Predictions of single batch. Contain the keys:
- boxes_3d (:obj:BaseInstance3DBoxes): Predicted 3d bboxes.
- scores_3d (Tensor): Score of each bbox.
- labels_3d (Tensor): Label of each bbox.
- cls_preds (Tensor): Class score of each bbox.
"""
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = [] mlvl_bboxes = []
mlvl_max_scores = [] mlvl_max_scores = []
...@@ -159,6 +178,28 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -159,6 +178,28 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score, mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg, mlvl_dir_scores, score_thr, max_num, cfg,
input_meta): input_meta):
"""Class agnostic nms for single batch.
Args:
mlvl_bboxes (Tensor): Bboxes from Multi-level.
mlvl_bboxes_for_nms (Tensor): Bboxes for nms (bev or minmax boxes)
from Multi-level.
mlvl_max_scores (Tensor): Max scores of Multi-level bbox.
mlvl_label_pred (Tensor): Class predictions of Multi-level bbox.
mlvl_cls_score (Tensor): Class scores of Multi-level bbox.
mlvl_dir_scores (Tensor): Direction scores of Multi-level bbox.
score_thr (int): Score threshold.
max_num (int): Max number of bboxes after nms.
cfg (None | ConfigDict): Training or testing config.
input_meta (dict): Contain pcd and img's meta info.
Returns:
dict: Predictions of single batch. Contain the keys:
- boxes_3d (:obj:BaseInstance3DBoxes): Predicted 3d bboxes.
- scores_3d (Tensor): Score of each bbox.
- labels_3d (Tensor): Label of each bbox.
- cls_preds (Tensor): Class score of each bbox.
"""
bboxes = [] bboxes = []
scores = [] scores = []
labels = [] labels = []
......
...@@ -20,12 +20,17 @@ class AnchorTrainMixin(object): ...@@ -20,12 +20,17 @@ class AnchorTrainMixin(object):
Args: Args:
anchor_list (list[list]): Multi level anchors of each image. anchor_list (list[list]): Multi level anchors of each image.
gt_bboxes_list (list[BaseInstance3DBoxes]): Ground truth gt_bboxes_list (list[:obj:BaseInstance3DBoxes]): Ground truth
bboxes of each image. bboxes of each image.
img_metas (list[dict]): Meta info of each image. input_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (None | list): Ignore list of gt bboxes.
gt_labels_list (list[Tensor]): Gt labels of batches.
label_channels (int): The channel of labels.
num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors.
Returns: Returns:
tuple tuple: Anchor targets.
""" """
num_imgs = len(input_metas) num_imgs = len(input_metas)
assert len(anchor_list) == num_imgs assert len(anchor_list) == num_imgs
...@@ -87,6 +92,21 @@ class AnchorTrainMixin(object): ...@@ -87,6 +92,21 @@ class AnchorTrainMixin(object):
label_channels=1, label_channels=1,
num_classes=1, num_classes=1,
sampling=True): sampling=True):
"""Compute targets of anchors in single batch.
Args:
anchors (Tensor): Concatenated multi-level anchor.
gt_bboxes (:obj:BaseInstance3DBoxes): Gt bboxes.
gt_bboxes_ignore (Tensor): Ignored gt bboxes.
gt_labels (Tensor): Gt class labels.
input_meta (dict): Meta info of each image.
label_channels (int): The channel of labels.
num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors.
Returns:
tuple: Anchor targets.
"""
if isinstance(self.bbox_assigner, list): if isinstance(self.bbox_assigner, list):
feat_size = anchors.size(0) * anchors.size(1) * anchors.size(2) feat_size = anchors.size(0) * anchors.size(1) * anchors.size(2)
rot_angles = anchors.size(-2) rot_angles = anchors.size(-2)
...@@ -160,6 +180,22 @@ class AnchorTrainMixin(object): ...@@ -160,6 +180,22 @@ class AnchorTrainMixin(object):
label_channels=1, label_channels=1,
num_classes=1, num_classes=1,
sampling=True): sampling=True):
"""Assign anchors and encode positive anchors.
Args:
bbox_assigner (BaseAssigner): assign positive and negative boxes.
anchors (Tensor): Concatenated multi-level anchor.
gt_bboxes (:obj:BaseInstance3DBoxes): Gt bboxes.
gt_bboxes_ignore (Tensor): Ignored gt bboxes.
gt_labels (Tensor): Gt class labels.
input_meta (dict): Meta info of each image.
label_channels (int): The channel of labels.
num_classes (int): The number of classes.
sampling (bool): Whether to sample anchors.
Returns:
tuple: Anchor targets.
"""
anchors = anchors.reshape(-1, anchors.size(-1)) anchors = anchors.reshape(-1, anchors.size(-1))
num_valid_anchors = anchors.shape[0] num_valid_anchors = anchors.shape[0]
bbox_targets = torch.zeros_like(anchors) bbox_targets = torch.zeros_like(anchors)
...@@ -221,6 +257,18 @@ def get_direction_target(anchors, ...@@ -221,6 +257,18 @@ def get_direction_target(anchors,
dir_offset=0, dir_offset=0,
num_bins=2, num_bins=2,
one_hot=True): one_hot=True):
"""Encode direction to 0 ~ num_bins-1.
Args:
anchors (Tensor): Concatenated multi-level anchor.
reg_targets (Tensor): Bbox regression targets.
dir_offset (int): Direction offset.
num_bins (int): Number of bins to divide 2*PI.
one_hot (bool): Whether to encode as one hot.
Returns:
Tensor: Encoded direction targets.
"""
rot_gt = reg_targets[..., 6] + anchors[..., 6] rot_gt = reg_targets[..., 6] + anchors[..., 6]
offset_rot = box_torch_ops.limit_period(rot_gt - dir_offset, 0, 2 * np.pi) offset_rot = box_torch_ops.limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long() dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()
......
...@@ -122,6 +122,9 @@ class VoteHead(nn.Module): ...@@ -122,6 +122,9 @@ class VoteHead(nn.Module):
feat_dict (dict): feature dict from backbone. feat_dict (dict): feature dict from backbone.
sample_mod (str): sample mode for vote aggregation layer. sample_mod (str): sample mode for vote aggregation layer.
valid modes are "vote", "seed" and "random". valid modes are "vote", "seed" and "random".
Returns:
dict: Predictions of vote head.
""" """
assert sample_mod in ['vote', 'seed', 'random'] assert sample_mod in ['vote', 'seed', 'random']
...@@ -178,8 +181,24 @@ class VoteHead(nn.Module): ...@@ -178,8 +181,24 @@ class VoteHead(nn.Module):
gt_labels_3d, gt_labels_3d,
pts_semantic_mask=None, pts_semantic_mask=None,
pts_instance_mask=None, pts_instance_mask=None,
img_meta=None, input_meta=None,
gt_bboxes_ignore=None): gt_bboxes_ignore=None):
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[Tensor]): Input points.
gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): Gt bboxes
of each sample.
gt_labels_3d (list[Tensor]): Gt labels of each sample.
pts_semantic_mask (None | list[Tensor]): Point-wise semantic mask.
pts_instance_mask (None | list[Tensor]): Point-wise instance mask.
input_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
Returns:
dict: Losses of Votenet.
"""
targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d, targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask, pts_semantic_mask, pts_instance_mask,
bbox_preds) bbox_preds)
...@@ -269,11 +288,12 @@ class VoteHead(nn.Module): ...@@ -269,11 +288,12 @@ class VoteHead(nn.Module):
pts_semantic_mask=None, pts_semantic_mask=None,
pts_instance_mask=None, pts_instance_mask=None,
bbox_preds=None): bbox_preds=None):
"""Get targets of vote head. """Generate targets of vote head.
Args: Args:
points (list[Tensor]): Points of each batch. points (list[Tensor]): Points of each batch.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch. gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): gt bboxes of
each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch. gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch. label of each batch.
...@@ -284,7 +304,6 @@ class VoteHead(nn.Module): ...@@ -284,7 +304,6 @@ class VoteHead(nn.Module):
Returns: Returns:
tuple: Targets of vote head. tuple: Targets of vote head.
""" """
# find empty example # find empty example
valid_gt_masks = list() valid_gt_masks = list()
gt_num = list() gt_num = list()
...@@ -355,6 +374,22 @@ class VoteHead(nn.Module): ...@@ -355,6 +374,22 @@ class VoteHead(nn.Module):
pts_semantic_mask=None, pts_semantic_mask=None,
pts_instance_mask=None, pts_instance_mask=None,
aggregated_points=None): aggregated_points=None):
"""Generate targets of vote head for single batch.
Args:
points (Tensor): Points of each batch.
gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes of each batch.
gt_labels_3d (Tensor): gt class labels of each batch.
pts_semantic_mask (None | Tensor): point-wise semantic
label of each batch.
pts_instance_mask (None | Tensor): point-wise instance
label of each batch.
aggregated_points (Tensor): Aggregated points from
vote aggregation layer.
Returns:
tuple: Targets of vote head.
"""
assert self.bbox_coder.with_rot or pts_semantic_mask is not None assert self.bbox_coder.with_rot or pts_semantic_mask is not None
gt_bboxes_3d = gt_bboxes_3d.to(points.device) gt_bboxes_3d = gt_bboxes_3d.to(points.device)
...@@ -451,19 +486,30 @@ class VoteHead(nn.Module): ...@@ -451,19 +486,30 @@ class VoteHead(nn.Module):
dir_class_targets, dir_res_targets, center_targets, dir_class_targets, dir_res_targets, center_targets,
mask_targets.long(), objectness_targets, objectness_masks) mask_targets.long(), objectness_targets, objectness_masks)
def get_bboxes(self, points, bbox_preds, input_meta, rescale=False): def get_bboxes(self, points, bbox_preds, input_metas, rescale=False):
"""Generate bboxes from vote head predictions.
Args:
points (Tensor): Input points.
bbox_preds (dict): Predictions from vote head.
input_metas (list[dict]): Contain pcd and img's meta info.
rescale (bool): Whether to rescale bboxes.
Returns:
list[tuple[Tensor]]: Contain bbox, scores and labels.
"""
# decode boxes # decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1] obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1) sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
bbox_depth = self.bbox_coder.decode(bbox_preds) bbox3d = self.bbox_coder.decode(bbox_preds)
batch_size = bbox_depth.shape[0] batch_size = bbox3d.shape[0]
results = list() results = list()
for b in range(batch_size): for b in range(batch_size):
bbox_selected, score_selected, labels = self.multiclass_nms_single( bbox_selected, score_selected, labels = self.multiclass_nms_single(
obj_scores[b], sem_scores[b], bbox_depth[b], obj_scores[b], sem_scores[b], bbox3d[b], points[b, ..., :3],
points[b, ..., :3], input_meta[b]) input_metas[b])
bbox = input_meta[b]['box_type_3d']( bbox = input_metas[b]['box_type_3d'](
bbox_selected, bbox_selected,
box_dim=bbox_selected.shape[-1], box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot) with_yaw=self.bbox_coder.with_rot)
...@@ -473,6 +519,18 @@ class VoteHead(nn.Module): ...@@ -473,6 +519,18 @@ class VoteHead(nn.Module):
def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
input_meta): input_meta):
"""multi-class nms in single batch.
Args:
obj_scores (Tensor): Objectness score of bboxes.
sem_scores (Tensor): semantic class score of bboxes.
bbox (Tensor): Predicted bbox.
points (Tensor): Input points.
input_meta (dict): Contain pcd and img's meta info.
Returns:
tuple[Tensor]: Contain bbox, scores and labels.
"""
bbox = input_meta['box_type_3d']( bbox = input_meta['box_type_3d'](
bbox, bbox,
box_dim=bbox.shape[-1], box_dim=bbox.shape[-1],
......
...@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector): ...@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector):
Args: Args:
points (list[Tensor]): Points of each batch. points (list[Tensor]): Points of each batch.
img_meta (list): Image metas. img_meta (list): Image metas.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch. gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch. gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch. label of each batch.
...@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector): ...@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector):
Args: Args:
points (list[Tensor]): Points of each sample. points (list[Tensor]): Points of each sample.
img_meta (list): Image metas. img_meta (list): Image metas.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each sample. gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample. gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample. label of each sample.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment