Refactor optimizer and samplers

6d71b439 · zhangwenwei · ba492be7 · ba492be7 · ba492be7 · ba492be7
Commit 6d71b439 authored Apr 18, 2020 by zhangwenwei
18 changed files
--- a/mmdet3d/core/bbox/samplers/combined_sampler.py
+++ b/mmdet3d/core/bbox/samplers/combined_sampler.py
-from ..assign_sampling import build_sampler
-from .base_sampler import BaseSampler
-class CombinedSampler(BaseSampler):
-    def __init__(self, pos_sampler, neg_sampler, **kwargs):
-        super(CombinedSampler, self).__init__(**kwargs)
-        self.pos_sampler = build_sampler(pos_sampler, **kwargs)
-        self.neg_sampler = build_sampler(neg_sampler, **kwargs)
-    def _sample_pos(self, **kwargs):
-        raise NotImplementedError
-    def _sample_neg(self, **kwargs):
-        raise NotImplementedError
--- a/mmdet3d/core/bbox/samplers/instance_balanced_pos_sampler.py
+++ b/mmdet3d/core/bbox/samplers/instance_balanced_pos_sampler.py
-import numpy as np
-import torch
-from .random_sampler import RandomSampler
-class InstanceBalancedPosSampler(RandomSampler):
-    def _sample_pos(self, assign_result, num_expected, **kwargs):
-        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
-        if pos_inds.numel() != 0:
-            pos_inds = pos_inds.squeeze(1)
-        if pos_inds.numel() <= num_expected:
-            return pos_inds
-        else:
-            unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
-            num_gts = len(unique_gt_inds)
-            num_per_gt = int(round(num_expected / float(num_gts)) + 1)
-            sampled_inds = []
-            for i in unique_gt_inds:
-                inds = torch.nonzero(assign_result.gt_inds == i.item())
-                if inds.numel() != 0:
-                    inds = inds.squeeze(1)
-                else:
-                    continue
-                if len(inds) > num_per_gt:
-                    inds = self.random_choice(inds, num_per_gt)
-                sampled_inds.append(inds)
-            sampled_inds = torch.cat(sampled_inds)
-            if len(sampled_inds) < num_expected:
-                num_extra = num_expected - len(sampled_inds)
-                extra_inds = np.array(
-                    list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
-                if len(extra_inds) > num_extra:
-                    extra_inds = self.random_choice(extra_inds, num_extra)
-                extra_inds = torch.from_numpy(extra_inds).to(
-                    assign_result.gt_inds.device).long()
-                sampled_inds = torch.cat([sampled_inds, extra_inds])
-            elif len(sampled_inds) > num_expected:
-                sampled_inds = self.random_choice(sampled_inds, num_expected)
-            return sampled_inds
--- a/mmdet3d/core/bbox/samplers/iou_balanced_neg_sampler.py
+++ b/mmdet3d/core/bbox/samplers/iou_balanced_neg_sampler.py
-import numpy as np
-import torch
-from .random_sampler import RandomSampler
-class IoUBalancedNegSampler(RandomSampler):
-    """IoU Balanced Sampling
-    arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
-    Sampling proposals according to their IoU. `floor_fraction` of needed RoIs
-    are sampled from proposals whose IoU are lower than `floor_thr` randomly.
-    The others are sampled from proposals whose IoU are higher than
-    `floor_thr`. These proposals are sampled from some bins evenly, which are
-    split by `num_bins` via IoU evenly.
-    Args:
-        num (int): number of proposals.
-        pos_fraction (float): fraction of positive proposals.
-        floor_thr (float): threshold (minimum) IoU for IoU balanced sampling,
-            set to -1 if all using IoU balanced sampling.
-        floor_fraction (float): sampling fraction of proposals under floor_thr.
-        num_bins (int): number of bins in IoU balanced sampling.
-    """
-    def __init__(self,
-                 num,
-                 pos_fraction,
-                 floor_thr=-1,
-                 floor_fraction=0,
-                 num_bins=3,
-                 **kwargs):
-        super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
-                                                    **kwargs)
-        assert floor_thr >= 0 or floor_thr == -1
-        assert 0 <= floor_fraction <= 1
-        assert num_bins >= 1
-        self.floor_thr = floor_thr
-        self.floor_fraction = floor_fraction
-        self.num_bins = num_bins
-    def sample_via_interval(self, max_overlaps, full_set, num_expected):
-        max_iou = max_overlaps.max()
-        iou_interval = (max_iou - self.floor_thr) / self.num_bins
-        per_num_expected = int(num_expected / self.num_bins)
-        sampled_inds = []
-        for i in range(self.num_bins):
-            start_iou = self.floor_thr + i * iou_interval
-            end_iou = self.floor_thr + (i + 1) * iou_interval
-            tmp_set = set(
-                np.where(
-                    np.logical_and(max_overlaps >= start_iou,
-                                   max_overlaps < end_iou))[0])
-            tmp_inds = list(tmp_set & full_set)
-            if len(tmp_inds) > per_num_expected:
-                tmp_sampled_set = self.random_choice(tmp_inds,
-                                                     per_num_expected)
-            else:
-                tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
-            sampled_inds.append(tmp_sampled_set)
-        sampled_inds = np.concatenate(sampled_inds)
-        if len(sampled_inds) < num_expected:
-            num_extra = num_expected - len(sampled_inds)
-            extra_inds = np.array(list(full_set - set(sampled_inds)))
-            if len(extra_inds) > num_extra:
-                extra_inds = self.random_choice(extra_inds, num_extra)
-            sampled_inds = np.concatenate([sampled_inds, extra_inds])
-        return sampled_inds
-    def _sample_neg(self, assign_result, num_expected, **kwargs):
-        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
-        if neg_inds.numel() != 0:
-            neg_inds = neg_inds.squeeze(1)
-        if len(neg_inds) <= num_expected:
-            return neg_inds
-        else:
-            max_overlaps = assign_result.max_overlaps.cpu().numpy()
-            # balance sampling for negative samples
-            neg_set = set(neg_inds.cpu().numpy())
-            if self.floor_thr > 0:
-                floor_set = set(
-                    np.where(
-                        np.logical_and(max_overlaps >= 0,
-                                       max_overlaps < self.floor_thr))[0])
-                iou_sampling_set = set(
-                    np.where(max_overlaps >= self.floor_thr)[0])
-            elif self.floor_thr == 0:
-                floor_set = set(np.where(max_overlaps == 0)[0])
-                iou_sampling_set = set(
-                    np.where(max_overlaps > self.floor_thr)[0])
-            else:
-                floor_set = set()
-                iou_sampling_set = set(
-                    np.where(max_overlaps > self.floor_thr)[0])
-            floor_neg_inds = list(floor_set & neg_set)
-            iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
-            num_expected_iou_sampling = int(num_expected *
-                                            (1 - self.floor_fraction))
-            if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
-                if self.num_bins >= 2:
-                    iou_sampled_inds = self.sample_via_interval(
-                        max_overlaps, set(iou_sampling_neg_inds),
-                        num_expected_iou_sampling)
-                else:
-                    iou_sampled_inds = self.random_choice(
-                        iou_sampling_neg_inds, num_expected_iou_sampling)
-            else:
-                iou_sampled_inds = np.array(
-                    iou_sampling_neg_inds, dtype=np.int)
-            num_expected_floor = num_expected - len(iou_sampled_inds)
-            if len(floor_neg_inds) > num_expected_floor:
-                sampled_floor_inds = self.random_choice(
-                    floor_neg_inds, num_expected_floor)
-            else:
-                sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
-            sampled_inds = np.concatenate(
-                (sampled_floor_inds, iou_sampled_inds))
-            if len(sampled_inds) < num_expected:
-                num_extra = num_expected - len(sampled_inds)
-                extra_inds = np.array(list(neg_set - set(sampled_inds)))
-                if len(extra_inds) > num_extra:
-                    extra_inds = self.random_choice(extra_inds, num_extra)
-                sampled_inds = np.concatenate((sampled_inds, extra_inds))
-            sampled_inds = torch.from_numpy(sampled_inds).long().to(
-                assign_result.gt_inds.device)
-            return sampled_inds
--- a/mmdet3d/core/bbox/samplers/ohem_sampler.py
+++ b/mmdet3d/core/bbox/samplers/ohem_sampler.py
-import torch
-from ..transforms import bbox2roi
-from .base_sampler import BaseSampler
-class OHEMSampler(BaseSampler):
-    def __init__(self,
-                 num,
-                 pos_fraction,
-                 context,
-                 neg_pos_ub=-1,
-                 add_gt_as_proposals=True,
-                 **kwargs):
-        super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
-                                          add_gt_as_proposals)
-        if not hasattr(context, 'num_stages'):
-            self.bbox_roi_extractor = context.bbox_roi_extractor
-            self.bbox_head = context.bbox_head
-        else:
-            self.bbox_roi_extractor = context.bbox_roi_extractor[
-                context.current_stage]
-            self.bbox_head = context.bbox_head[context.current_stage]
-    def hard_mining(self, inds, num_expected, bboxes, labels, feats):
-        with torch.no_grad():
-            rois = bbox2roi([bboxes])
-            bbox_feats = self.bbox_roi_extractor(
-                feats[:self.bbox_roi_extractor.num_inputs], rois)
-            cls_score, _ = self.bbox_head(bbox_feats)
-            loss = self.bbox_head.loss(
-                cls_score=cls_score,
-                bbox_pred=None,
-                labels=labels,
-                label_weights=cls_score.new_ones(cls_score.size(0)),
-                bbox_targets=None,
-                bbox_weights=None,
-                reduction_override='none')['loss_cls']
-            _, topk_loss_inds = loss.topk(num_expected)
-        return inds[topk_loss_inds]
-    def _sample_pos(self,
-                    assign_result,
-                    num_expected,
-                    bboxes=None,
-                    feats=None,
-                    **kwargs):
-        # Sample some hard positive samples
-        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
-        if pos_inds.numel() != 0:
-            pos_inds = pos_inds.squeeze(1)
-        if pos_inds.numel() <= num_expected:
-            return pos_inds
-        else:
-            return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
-                                    assign_result.labels[pos_inds], feats)
-    def _sample_neg(self,
-                    assign_result,
-                    num_expected,
-                    bboxes=None,
-                    feats=None,
-                    **kwargs):
-        # Sample some hard negative samples
-        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
-        if neg_inds.numel() != 0:
-            neg_inds = neg_inds.squeeze(1)
-        if len(neg_inds) <= num_expected:
-            return neg_inds
-        else:
-            return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
-                                    assign_result.labels[neg_inds], feats)
--- a/mmdet3d/core/bbox/samplers/pseudo_sampler.py
+++ b/mmdet3d/core/bbox/samplers/pseudo_sampler.py
-import torch
-from .base_sampler import BaseSampler
-from .sampling_result import SamplingResult
-class PseudoSampler(BaseSampler):
-    def __init__(self, **kwargs):
-        pass
-    def _sample_pos(self, **kwargs):
-        raise NotImplementedError
-    def _sample_neg(self, **kwargs):
-        raise NotImplementedError
-    def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
-        pos_inds = torch.nonzero(
-            assign_result.gt_inds > 0).squeeze(-1).unique()
-        neg_inds = torch.nonzero(
-            assign_result.gt_inds == 0).squeeze(-1).unique()
-        gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
-        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
-                                         assign_result, gt_flags)
-        return sampling_result
--- a/mmdet3d/core/bbox/samplers/random_sampler.py
+++ b/mmdet3d/core/bbox/samplers/random_sampler.py
-import numpy as np
-import torch
-from .base_sampler import BaseSampler
-class RandomSampler(BaseSampler):
-    def __init__(self,
-                 num,
-                 pos_fraction,
-                 neg_pos_ub=-1,
-                 add_gt_as_proposals=True,
-                 **kwargs):
-        super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
-                                            add_gt_as_proposals)
-    @staticmethod
-    def random_choice(gallery, num):
-        """Random select some elements from the gallery.
-        It seems that Pytorch's implementation is slower than numpy so we use
-        numpy to randperm the indices.
-        """
-        assert len(gallery) >= num
-        if isinstance(gallery, list):
-            gallery = np.array(gallery)
-        cands = np.arange(len(gallery))
-        np.random.shuffle(cands)
-        rand_inds = cands[:num]
-        if not isinstance(gallery, np.ndarray):
-            rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
-        return gallery[rand_inds]
-    def _sample_pos(self, assign_result, num_expected, **kwargs):
-        """Randomly sample some positive samples."""
-        pos_inds = torch.nonzero(assign_result.gt_inds > 0)
-        if pos_inds.numel() != 0:
-            pos_inds = pos_inds.squeeze(1)
-        if pos_inds.numel() <= num_expected:
-            return pos_inds
-        else:
-            return self.random_choice(pos_inds, num_expected)
-    def _sample_neg(self, assign_result, num_expected, **kwargs):
-        """Randomly sample some negative samples."""
-        neg_inds = torch.nonzero(assign_result.gt_inds == 0)
-        if neg_inds.numel() != 0:
-            neg_inds = neg_inds.squeeze(1)
-        if len(neg_inds) <= num_expected:
-            return neg_inds
-        else:
-            return self.random_choice(neg_inds, num_expected)
--- a/mmdet3d/core/bbox/samplers/sampling_result.py
+++ b/mmdet3d/core/bbox/samplers/sampling_result.py
-import torch
-class SamplingResult(object):
-    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
-                 gt_flags):
-        self.pos_inds = pos_inds
-        self.neg_inds = neg_inds
-        self.pos_bboxes = bboxes[pos_inds]
-        self.neg_bboxes = bboxes[neg_inds]
-        self.pos_is_gt = gt_flags[pos_inds]
-        self.num_gts = gt_bboxes.shape[0]
-        self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
-        self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
-        if assign_result.labels is not None:
-            self.pos_gt_labels = assign_result.labels[pos_inds]
-        else:
-            self.pos_gt_labels = None
-    @property
-    def bboxes(self):
-        return torch.cat([self.pos_bboxes, self.neg_bboxes])
--- a/mmdet3d/core/bbox/transforms.py
+++ b/mmdet3d/core/bbox/transforms.py
-import mmcv
-import numpy as np
 import torch
-def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
-    assert proposals.size() == gt.size()
-    proposals = proposals.float()
-    gt = gt.float()
-    px = (proposals[..., 0] + proposals[..., 2]) * 0.5
-    py = (proposals[..., 1] + proposals[..., 3]) * 0.5
-    pw = proposals[..., 2] - proposals[..., 0]
-    ph = proposals[..., 3] - proposals[..., 1]
-    gx = (gt[..., 0] + gt[..., 2]) * 0.5
-    gy = (gt[..., 1] + gt[..., 3]) * 0.5
-    gw = gt[..., 2] - gt[..., 0]
-    gh = gt[..., 3] - gt[..., 1]
-    dx = (gx - px) / pw
-    dy = (gy - py) / ph
-    dw = torch.log(gw / pw)
-    dh = torch.log(gh / ph)
-    deltas = torch.stack([dx, dy, dw, dh], dim=-1)
-    means = deltas.new_tensor(means).unsqueeze(0)
-    stds = deltas.new_tensor(stds).unsqueeze(0)
-    deltas = deltas.sub_(means).div_(stds)
-    return deltas
-def delta2bbox(rois,
-               deltas,
-               means=[0, 0, 0, 0],
-               stds=[1, 1, 1, 1],
-               max_shape=None,
-               wh_ratio_clip=16 / 1000):
-    """
-    Apply deltas to shift/scale base boxes.
-    Typically the rois are anchor or proposed bounding boxes and the deltas are
-    network outputs used to shift/scale those boxes.
-    Args:
-        rois (Tensor): boxes to be transformed. Has shape (N, 4)
-        deltas (Tensor): encoded offsets with respect to each roi.
-            Has shape (N, 4). Note N = num_anchors * W * H when rois is a grid
-            of anchors. Offset encoding follows [1]_.
-        means (list): denormalizing means for delta coordinates
-        stds (list): denormalizing standard deviation for delta coordinates
-        max_shape (tuple[int, int]): maximum bounds for boxes. specifies (H, W)
-        wh_ratio_clip (float): maximum aspect ratio for boxes.
-    Returns:
-        Tensor: boxes with shape (N, 4), where columns represent
-            tl_x, tl_y, br_x, br_y.
-    References:
-        .. [1] https://arxiv.org/abs/1311.2524
-    Example:
-        >>> rois = torch.Tensor([[ 0.,  0.,  1.,  1.],
-        >>>                      [ 0.,  0.,  1.,  1.],
-        >>>                      [ 0.,  0.,  1.,  1.],
-        >>>                      [ 5.,  5.,  5.,  5.]])
-        >>> deltas = torch.Tensor([[  0.,   0.,   0.,   0.],
-        >>>                        [  1.,   1.,   1.,   1.],
-        >>>                        [  0.,   0.,   2.,  -1.],
-        >>>                        [ 0.7, -1.9, -0.5,  0.3]])
-        >>> delta2bbox(rois, deltas, max_shape=(32, 32))
-        tensor([[0.0000, 0.0000, 1.0000, 1.0000],
-                [0.2817, 0.2817, 4.7183, 4.7183],
-                [0.0000, 0.6321, 7.3891, 0.3679],
-                [5.8967, 2.9251, 5.5033, 3.2749]])
-    """
-    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
-    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
-    denorm_deltas = deltas * stds + means
-    dx = denorm_deltas[:, 0::4]
-    dy = denorm_deltas[:, 1::4]
-    dw = denorm_deltas[:, 2::4]
-    dh = denorm_deltas[:, 3::4]
-    max_ratio = np.abs(np.log(wh_ratio_clip))
-    dw = dw.clamp(min=-max_ratio, max=max_ratio)
-    dh = dh.clamp(min=-max_ratio, max=max_ratio)
-    # Compute center of each roi
-    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
-    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
-    # Compute width/height of each roi
-    pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
-    ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
-    # Use exp(network energy) to enlarge/shrink each roi
-    gw = pw * dw.exp()
-    gh = ph * dh.exp()
-    # Use network energy to shift the center of each roi
-    gx = torch.addcmul(px, 1, pw, dx)  # gx = px + pw * dx
-    gy = torch.addcmul(py, 1, ph, dy)  # gy = py + ph * dy
-    # Convert center-xy/width/height to top-left, bottom-right
-    x1 = gx - gw * 0.5
-    y1 = gy - gh * 0.5
-    x2 = gx + gw * 0.5
-    y2 = gy + gh * 0.5
-    if max_shape is not None:
-        x1 = x1.clamp(min=0, max=max_shape[1])
-        y1 = y1.clamp(min=0, max=max_shape[0])
-        x2 = x2.clamp(min=0, max=max_shape[1])
-        y2 = y2.clamp(min=0, max=max_shape[0])
-    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
-    return bboxes
-def bbox_flip(bboxes, img_shape):
-    """Flip bboxes horizontally.
-    Args:
-        bboxes(Tensor or ndarray): Shape (..., 4*k)
-        img_shape(tuple): Image shape.
-    Returns:
-        Same type as `bboxes`: Flipped bboxes.
-    """
-    if isinstance(bboxes, torch.Tensor):
-        assert bboxes.shape[-1] % 4 == 0
-        flipped = bboxes.clone()
-        flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4]
-        flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4]
-        return flipped
-    elif isinstance(bboxes, np.ndarray):
-        return mmcv.bbox_flip(bboxes, img_shape)
-def bbox_mapping(bboxes, img_shape, scale_factor, flip):
-    """Map bboxes from the original image scale to testing scale"""
-    new_bboxes = bboxes * scale_factor
-    if flip:
-        new_bboxes = bbox_flip(new_bboxes, img_shape)
-    return new_bboxes
-def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
-    """Map bboxes from testing scale to original image scale"""
-    new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
-    new_bboxes = new_bboxes / scale_factor
-    return new_bboxes
-def bbox2roi(bbox_list):
-    """Convert a list of bboxes to roi format.
-    Args:
-        bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
-            of images.
-    Returns:
-        Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
-    """
-    rois_list = []
-    for img_id, bboxes in enumerate(bbox_list):
-        if bboxes.size(0) > 0:
-            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
-            rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)
-        else:
-            rois = bboxes.new_zeros((0, 5))
-        rois_list.append(rois)
-    rois = torch.cat(rois_list, 0)
-    return rois
-def roi2bbox(rois):
-    bbox_list = []
-    img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)
-    for img_id in img_ids:
-        inds = (rois[:, 0] == img_id.item())
-        bbox = rois[inds, 1:]
-        bbox_list.append(bbox)
-    return bbox_list
-def bbox2result_coco(bboxes, labels, num_classes):
-    """Convert detection results to a list of numpy arrays.
-    Args:
-        bboxes (Tensor): shape (n, 5)
-        labels (Tensor): shape (n, )
-        num_classes (int): class number, including background class
-    Returns:
-        list(ndarray): bbox results of each class
-    """
-    if bboxes.shape[0] == 0:
-        return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
-    else:
-        bboxes = bboxes.cpu().numpy()
-        labels = labels.cpu().numpy()
-        return [bboxes[labels == i, :] for i in range(num_classes)]
-def distance2bbox(points, distance, max_shape=None):
-    """Decode distance prediction to bounding box.
-    Args:
-        points (Tensor): Shape (n, 2), [x, y].
-        distance (Tensor): Distance from the given point to 4
-            boundaries (left, top, right, bottom).
-        max_shape (tuple): Shape of the image.
-    Returns:
-        Tensor: Decoded bboxes.
-    """
-    x1 = points[:, 0] - distance[:, 0]
-    y1 = points[:, 1] - distance[:, 1]
-    x2 = points[:, 0] + distance[:, 2]
-    y2 = points[:, 1] + distance[:, 3]
-    if max_shape is not None:
-        x1 = x1.clamp(min=0, max=max_shape[1])
-        y1 = y1.clamp(min=0, max=max_shape[0])
-        x2 = x2.clamp(min=0, max=max_shape[1])
-        y2 = y2.clamp(min=0, max=max_shape[0])
-    return torch.stack([x1, y1, x2, y2], -1)
 def transform_lidar_to_cam(boxes_lidar):
    """
    Only transform format, not exactly in camera coords

--- a/mmdet3d/core/optimizer/__init__.py
+++ b/mmdet3d/core/optimizer/__init__.py
-from .builder import build_optimizer
+from .cocktail_constructor import CocktailOptimizerConstructor
-from .mix_optimizer import MixedOptimizer
+from .cocktail_optimizer import CocktailOptimizer
-from .registry import OPTIMIZERS
-__all__ = ['OPTIMIZERS', 'build_optimizer', 'MixedOptimizer']
+__all__ = ['CocktailOptimizerConstructor', 'CocktailOptimizer']
--- a/mmdet3d/core/optimizer/builder.py
+++ b/mmdet3d/core/optimizer/builder.py
-import re
-import torch
-from mmdet.utils import build_from_cfg, get_root_logger
-from .registry import OPTIMIZERS
-def build_optimizer(model, optimizer_cfg):
-    """Build optimizer from configs.
-    Args:
-        model (:obj:`nn.Module`): The model with parameters to be optimized.
-        optimizer_cfg (dict): The config dict of the optimizer.
-            Positional fields are:
-                - type: class name of the optimizer.
-                - lr: base learning rate.
-            Optional fields are:
-                - any arguments of the corresponding optimizer type, e.g.,
-                  weight_decay, momentum, etc.
-                - paramwise_options: a dict with 4 accepted fileds
-                  (bias_lr_mult, bias_decay_mult, norm_decay_mult,
-                  dwconv_decay_mult).
-                  `bias_lr_mult` and `bias_decay_mult` will be multiplied to
-                  the lr and weight decay respectively for all bias parameters
-                  (except for the normalization layers), and
-                  `norm_decay_mult` will be multiplied to the weight decay
-                  for all weight and bias parameters of normalization layers.
-                  `dwconv_decay_mult` will be multiplied to the weight decay
-                  for all weight and bias parameters of depthwise conv layers.
-    Returns:
-        torch.optim.Optimizer: The initialized optimizer.
-    Example:
-        >>> import torch
-        >>> model = torch.nn.modules.Conv1d(1, 1, 1)
-        >>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
-        >>>                      weight_decay=0.0001)
-        >>> optimizer = build_optimizer(model, optimizer_cfg)
-    """
-    if hasattr(model, 'module'):
-        model = model.module
-    optimizer_cfg = optimizer_cfg.copy()
-    if isinstance(optimizer_cfg, list):
-        # Assume paramwise_options is None if optimizer_cfg is list
-        from .mix_optimizer import MixedOptimizer
-        logger = get_root_logger()
-        keys = [optimizer.pop('key') for optimizer in optimizer_cfg]
-        keys_params = {key: [] for key in keys}
-        keys_params_name = {key: [] for key in keys}
-        keys_optimizer = []
-        for name, param in model.named_parameters():
-            param_group = {'params': [param]}
-            find_flag = False
-            for key in keys:
-                if key in name:
-                    keys_params[key].append(param_group)
-                    keys_params_name[key].append(name)
-                    find_flag = True
-                    break
-            assert find_flag, 'key {} is not matched to any optimizer'.format(
-                name)
-        step_intervals = []
-        for key, single_cfg in zip(keys, optimizer_cfg):
-            optimizer_cls = getattr(torch.optim, single_cfg.pop('type'))
-            step_intervals.append(single_cfg.pop('step_interval', 1))
-            single_optim = optimizer_cls(keys_params[key], **single_cfg)
-            keys_optimizer.append(single_optim)
-            logger.info('{} optimizes key:\n {}\n'.format(
-                optimizer_cls.__name__, keys_params_name[key]))
-        mix_optimizer = MixedOptimizer(keys_optimizer, step_intervals)
-        return mix_optimizer
-    else:
-        paramwise_options = optimizer_cfg.pop('paramwise_options', None)
-    # if no paramwise option is specified, just use the global setting
-    if paramwise_options is None:
-        params = model.parameters()
-    else:
-        assert isinstance(paramwise_options, dict)
-        # get base lr and weight decay
-        base_lr = optimizer_cfg['lr']
-        base_wd = optimizer_cfg.get('weight_decay', None)
-        # weight_decay must be explicitly specified if mult is specified
-        if ('bias_decay_mult' in paramwise_options
-                or 'norm_decay_mult' in paramwise_options
-                or 'dwconv_decay_mult' in paramwise_options):
-            assert base_wd is not None
-        # get param-wise options
-        bias_lr_mult = paramwise_options.get('bias_lr_mult', 1.)
-        bias_decay_mult = paramwise_options.get('bias_decay_mult', 1.)
-        norm_decay_mult = paramwise_options.get('norm_decay_mult', 1.)
-        dwconv_decay_mult = paramwise_options.get('dwconv_decay_mult', 1.)
-        named_modules = dict(model.named_modules())
-        # set param-wise lr and weight decay
-        params = []
-        for name, param in model.named_parameters():
-            param_group = {'params': [param]}
-            if not param.requires_grad:
-                # FP16 training needs to copy gradient/weight between master
-                # weight copy and model weight, it is convenient to keep all
-                # parameters here to align with model.parameters()
-                params.append(param_group)
-                continue
-            # for norm layers, overwrite the weight decay of weight and bias
-            # TODO: obtain the norm layer prefixes dynamically
-            if re.search(r'(bn|gn)(\d+)?.(weight|bias)', name):
-                if base_wd is not None:
-                    param_group['weight_decay'] = base_wd * norm_decay_mult
-            # for other layers, overwrite both lr and weight decay of bias
-            elif name.endswith('.bias'):
-                param_group['lr'] = base_lr * bias_lr_mult
-                if base_wd is not None:
-                    param_group['weight_decay'] = base_wd * bias_decay_mult
-            module_name = name.replace('.weight', '').replace('.bias', '')
-            if module_name in named_modules and base_wd is not None:
-                module = named_modules[module_name]
-                # if this Conv2d is depthwise Conv2d
-                if isinstance(module, torch.nn.Conv2d) and \
-                        module.in_channels == module.groups:
-                    param_group['weight_decay'] = base_wd * dwconv_decay_mult
-            # otherwise use the global settings
-            params.append(param_group)
-    optimizer_cfg['params'] = params
-    return build_from_cfg(optimizer_cfg, OPTIMIZERS)
--- a/mmdet3d/core/optimizer/cocktail_constructor.py
+++ b/mmdet3d/core/optimizer/cocktail_constructor.py
+from mmcv.utils import build_from_cfg
+from mmdet.core.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
+from mmdet.utils import get_root_logger
+from .cocktail_optimizer import CocktailOptimizer
+@OPTIMIZER_BUILDERS.register_module
+class CocktailOptimizerConstructor(object):
+    """Default constructor for optimizers.
+    Attributes:
+        model (:obj:`nn.Module`): The model with parameters to be optimized.
+        optimizer_cfg (dict): The config dict of the optimizer.
+            Positional fields are:
+                - type: class name of the optimizer.
+                - lr: base learning rate.
+            Optional fields are:
+                - any arguments of the corresponding optimizer type, e.g.,
+                  weight_decay, momentum, etc.
+        paramwise_cfg (dict, optional): Parameter-wise options. Accepted fields
+            are:
+            - bias_lr_mult: It will be multiplied to the learning rate for
+              all bias parameters (except for those in normalization layers).
+            - bias_decay_mult: It will be multiplied to the weight decay for
+              all bias parameters (except for those in normalization layers and
+              depthwise conv layers).
+            - norm_decay_mult: will be multiplied to the weight decay
+              for all weight and bias parameters of normalization layers.
+            - dwconv_decay_mult: will be multiplied to the weight decay
+              for all weight and bias parameters of depthwise conv layers.
+    Example:
+        >>> import torch
+        >>> import torch.nn as nn
+        >>> model = nn.ModuleDict({
+        >>>     'pts': nn.modules.Conv1d(1, 1, 1, bias=False),
+        >>>     'img': nn.modules.Conv1d(1, 1, 1, bias=False)
+        >>> })
+        >>> optimizer_cfg = dict(
+        >>>    pts=dict(type='AdamW', lr=0.001,
+        >>>             weight_decay=0.01, step_interval=1),
+        >>>    img=dict(type='SGD', lr=0.02, momentum=0.9,
+        >>>             weight_decay=0.0001, step_interval=2))
+        >>> optim_builder = CocktailOptimizerConstructor(optimizer_cfg)
+        >>> optimizer = optim_builder(model)
+        >>> print(optimizer)
+        CocktailOptimizer (
+        Update interval: 1
+        AdamW (
+          Parameter Group 0
+              amsgrad: False
+              betas: (0.9, 0.999)
+              eps: 1e-08
+              lr: 0.001
+              weight_decay: 0.01
+          ),
+        Update interval: 2
+        SGD (
+          Parameter Group 0
+              dampening: 0
+              lr: 0.02
+              momentum: 0.9
+              nesterov: False
+              weight_decay: 0.0001
+          ),
+        )
+    """
+    def __init__(self, optimizer_cfg, paramwise_cfg=None):
+        if not isinstance(optimizer_cfg, dict):
+            raise TypeError('optimizer_cfg should be a dict',
+                            'but got {}'.format(type(optimizer_cfg)))
+        assert paramwise_cfg is None, \
+            'Parameter wise config is not supported in Cocktail Optimizer'
+        self.optimizer_cfg = optimizer_cfg
+    def __call__(self, model):
+        if hasattr(model, 'module'):
+            model = model.module
+        optimizer_cfg = self.optimizer_cfg.copy()
+        logger = get_root_logger()
+        keys_prefix = [key_prefix for key_prefix in optimizer_cfg.keys()]
+        keys_params = {key: [] for key in keys_prefix}
+        keys_params_name = {key: [] for key in keys_prefix}
+        keys_optimizer = []
+        for name, param in model.named_parameters():
+            param_group = {'params': [param]}
+            find_flag = False
+            for key in keys_prefix:
+                if key in name:
+                    keys_params[key].append(param_group)
+                    keys_params_name[key].append(name)
+                    find_flag = True
+                    break
+            assert find_flag, 'key {} is not matched to any optimizer'.format(
+                name)
+        step_intervals = []
+        for key, single_cfg in optimizer_cfg.items():
+            step_intervals.append(single_cfg.pop('step_interval', 1))
+            single_cfg['params'] = keys_params[key]
+            single_optim = build_from_cfg(single_cfg, OPTIMIZERS)
+            keys_optimizer.append(single_optim)
+            logger.info('{} optimizes key:\n {}\n'.format(
+                single_cfg['type'], keys_params_name[key]))
+        cocktail_optimizer = CocktailOptimizer(keys_optimizer, step_intervals)
+        return cocktail_optimizer
--- a/mmdet3d/core/optimizer/mix_optimizer.py
+++ b/mmdet3d/core/optimizer/mix_optimizer.py
 from torch.optim import Optimizer
-from .registry import OPTIMIZERS
+from mmdet.core.optimizer import OPTIMIZERS
 @OPTIMIZERS.register_module
-class MixedOptimizer(Optimizer):
+class CocktailOptimizer(Optimizer):
-    """Mixed Optimizer that contains multiple optimizers
+    """Cocktail Optimizer that contains multiple optimizers
    This optimizer applies the cocktail optimzation for multi-modality models.
@@ -36,8 +36,9 @@ class MixedOptimizer(Optimizer):
    def __repr__(self):
        format_string = self.__class__.__name__ + ' (\n'
-        for optimizer in self.optimizers:
+        for optimizer, interval in zip(self.optimizers, self.step_intervals):
-            format_string += '\t' + optimizer.__repr__ + ',\n'
+            format_string += 'Update interval: {}\n'.format(interval)
+            format_string += optimizer.__repr__().replace('\n', '\n  ') + ',\n'
        format_string += ')'
        return format_string

--- a/mmdet3d/core/optimizer/registry.py
+++ b/mmdet3d/core/optimizer/registry.py
-import inspect
-import torch
-from mmdet.utils import Registry
-OPTIMIZERS = Registry('optimizer')
-def register_torch_optimizers():
-    torch_optimizers = []
-    for module_name in dir(torch.optim):
-        if module_name.startswith('__'):
-            continue
-        _optim = getattr(torch.optim, module_name)
-        if inspect.isclass(_optim) and issubclass(_optim,
-                                                  torch.optim.Optimizer):
-            OPTIMIZERS.register_module(_optim)
-            torch_optimizers.append(module_name)
-    return torch_optimizers
-TORCH_OPTIMIZERS = register_torch_optimizers()
--- a/mmdet3d/models/anchor_heads/boxvelo_head.py
+++ b/mmdet3d/models/anchor_heads/boxvelo_head.py
@@ -49,7 +49,7 @@ class Anchor3DVeloHead(SECONDHead):
                 dir_limit_offset=1,
                 target_means=(.0, .0, .0, .0),
                 target_stds=(1.0, 1.0, 1.0, 1.0),
-                 bbox_coder=dict(type='ResidualCoder', ),
+                 bbox_coder=dict(type='Residual3DBoxCoder', ),
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=True,

--- a/mmdet3d/models/anchor_heads/second_head.py
+++ b/mmdet3d/models/anchor_heads/second_head.py
@@ -55,7 +55,7 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
                 dir_limit_offset=1,
                 target_means=(.0, .0, .0, .0),
                 target_stds=(1.0, 1.0, 1.0, 1.0),
-                 bbox_coder=dict(type='ResidualCoder'),
+                 bbox_coder=dict(type='Residual3DBoxCoder'),
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=True,

--- a/tests/test_forward.py
+++ b/tests/test_forward.py
+"""
+Test model forward process
+CommandLine:
+    pytest tests/test_forward.py
+    xdoctest tests/test_forward.py zero
+"""
+import copy
+from os.path import dirname, exists, join
+import numpy as np
+import torch
+def _get_config_directory():
+    """ Find the predefined detector config directory """
+    try:
+        # Assume we are running in the source mmdetection repo
+        repo_dpath = dirname(dirname(__file__))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmdet
+        repo_dpath = dirname(dirname(mmdet.__file__))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+def _get_config_module(fname):
+    """
+    Load a configuration as a python module
+    """
+    from mmcv import Config
+    config_dpath = _get_config_directory()
+    config_fpath = join(config_dpath, fname)
+    config_mod = Config.fromfile(config_fpath)
+    return config_mod
+def _get_detector_cfg(fname):
+    """
+    Grab configs necessary to create a detector. These are deep copied to allow
+    for safe modification of parameters without influencing other tests.
+    """
+    import mmcv
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
+    test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
+    return model, train_cfg, test_cfg
+def test_faster_rcnn_forward():
+    _test_two_stage_forward('nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py')
+def _test_two_stage_forward(cfg_file):
+    model, train_cfg, test_cfg = _get_detector_cfg(cfg_file)
+    model['pretrained'] = None
+    from mmdet.models import build_detector
+    detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+    input_shape = (1, 3, 256, 256)
+    # Test forward train with a non-empty truth batch
+    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+    gt_bboxes = mm_inputs['gt_bboxes']
+    gt_labels = mm_inputs['gt_labels']
+    gt_masks = mm_inputs['gt_masks']
+    losses = detector.forward(
+        imgs,
+        img_metas,
+        gt_bboxes=gt_bboxes,
+        gt_labels=gt_labels,
+        gt_masks=gt_masks,
+        return_loss=True)
+    assert isinstance(losses, dict)
+    from mmdet.apis.train import parse_losses
+    total_loss = parse_losses(losses)[0].requires_grad_(True)
+    assert float(total_loss.item()) > 0
+    total_loss.backward()
+    # Test forward train with an empty truth batch
+    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+    gt_bboxes = mm_inputs['gt_bboxes']
+    gt_labels = mm_inputs['gt_labels']
+    gt_masks = mm_inputs['gt_masks']
+    losses = detector.forward(
+        imgs,
+        img_metas,
+        gt_bboxes=gt_bboxes,
+        gt_labels=gt_labels,
+        gt_masks=gt_masks,
+        return_loss=True)
+    assert isinstance(losses, dict)
+    from mmdet.apis.train import parse_losses
+    total_loss = parse_losses(losses)[0].requires_grad_(True)
+    assert float(total_loss.item()) > 0
+    total_loss.backward()
+    # Test forward test
+    with torch.no_grad():
+        img_list = [g[None, :] for g in imgs]
+        batch_results = []
+        for one_img, one_meta in zip(img_list, img_metas):
+            result = detector.forward([one_img], [[one_meta]],
+                                      return_loss=False)
+            batch_results.append(result)
+def _test_single_stage_forward(cfg_file):
+    model, train_cfg, test_cfg = _get_detector_cfg(cfg_file)
+    model['pretrained'] = None
+    from mmdet.models import build_detector
+    detector = build_detector(model, train_cfg=train_cfg, test_cfg=test_cfg)
+    input_shape = (1, 3, 300, 300)
+    mm_inputs = _demo_mm_inputs(input_shape)
+    imgs = mm_inputs.pop('imgs')
+    img_metas = mm_inputs.pop('img_metas')
+    # Test forward train
+    gt_bboxes = mm_inputs['gt_bboxes']
+    gt_labels = mm_inputs['gt_labels']
+    losses = detector.forward(
+        imgs,
+        img_metas,
+        gt_bboxes=gt_bboxes,
+        gt_labels=gt_labels,
+        return_loss=True)
+    assert isinstance(losses, dict)
+    # Test forward test
+    with torch.no_grad():
+        img_list = [g[None, :] for g in imgs]
+        batch_results = []
+        for one_img, one_meta in zip(img_list, img_metas):
+            result = detector.forward([one_img], [[one_meta]],
+                                      return_loss=False)
+            batch_results.append(result)
+def _demo_mm_inputs(input_shape=(1, 3, 300, 300),
+                    num_items=None, num_classes=10):  # yapf: disable
+    """
+    Create a superset of inputs needed to run test or train batches.
+    Args:
+        input_shape (tuple):
+            input batch dimensions
+        num_items (None | List[int]):
+            specifies the number of boxes in each batch item
+        num_classes (int):
+            number of different labels a box might have
+    """
+    from mmdet.core import BitmapMasks
+    (N, C, H, W) = input_shape
+    rng = np.random.RandomState(0)
+    imgs = rng.rand(*input_shape)
+    img_metas = [{
+        'img_shape': (H, W, C),
+        'ori_shape': (H, W, C),
+        'pad_shape': (H, W, C),
+        'filename': '<demo>.png',
+        'scale_factor': 1.0,
+        'flip': False,
+    } for _ in range(N)]
+    gt_bboxes = []
+    gt_labels = []
+    gt_masks = []
+    for batch_idx in range(N):
+        if num_items is None:
+            num_boxes = rng.randint(1, 10)
+        else:
+            num_boxes = num_items[batch_idx]
+        cx, cy, bw, bh = rng.rand(num_boxes, 4).T
+        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)
+        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)
+        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)
+        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)
+        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
+        class_idxs = rng.randint(1, num_classes, size=num_boxes)
+        gt_bboxes.append(torch.FloatTensor(boxes))
+        gt_labels.append(torch.LongTensor(class_idxs))
+    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)
+    gt_masks.append(BitmapMasks(mask, H, W))
+    mm_inputs = {
+        'imgs': torch.FloatTensor(imgs).requires_grad_(True),
+        'img_metas': img_metas,
+        'gt_bboxes': gt_bboxes,
+        'gt_labels': gt_labels,
+        'gt_bboxes_ignore': None,
+        'gt_masks': gt_masks,
+    }
+    return mm_inputs
--- a/tools/test.py
+++ b/tools/test.py
@@ -51,12 +51,12 @@ def parse_args():
    parser.add_argument('checkpoint', help='checkpoint file')
    parser.add_argument('--out', help='output result file in pickle format')
    parser.add_argument(
-        '--fuse_conv_bn',
+        '--fuse-conv-bn',
        action='store_true',
        help='Whether to fuse conv and bn, this will slightly increase'
        'the inference speed')
    parser.add_argument(
-        '--format_only',
+        '--format-only',
        action='store_true',
        help='Format the output results without perform evaluation. It is'
        'useful when you want to format the result to a specific format and '
@@ -69,7 +69,7 @@ def parse_args():
        ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
    parser.add_argument('--show', action='store_true', help='show results')
    parser.add_argument(
-        '--gpu_collect',
+        '--gpu-collect',
        action='store_true',
        help='whether to use gpu to collect results.')
    parser.add_argument(

--- a/tools/train.py
+++ b/tools/train.py
@@ -11,19 +11,18 @@ from mmcv import Config
 from mmcv.runner import init_dist
 from mmdet3d import __version__
-from mmdet3d.apis import train_detector
 from mmdet3d.datasets import build_dataset
 from mmdet3d.models import build_detector
 from mmdet3d.utils import collect_env
-from mmdet.apis import get_root_logger, set_random_seed
+from mmdet.apis import get_root_logger, set_random_seed, train_detector
 def parse_args():
    parser = argparse.ArgumentParser(description='Train a detector')
    parser.add_argument('config', help='train config file path')
-    parser.add_argument('--work_dir', help='the dir to save logs and models')
+    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
-        '--resume_from', help='the checkpoint file to resume from')
+        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--validate',
        action='store_true',