Add pyramid data augmentation which mentioned in SE-SSD (#751)

* add pyramid augmentation * add pyramid augmentation * format the modified file Co-authored-by: Shaoshuai Shi <shaoshuaics@gmail.com>

Add pyramid data augmentation which mentioned in SE-SSD (#751)
* add pyramid augmentation * add pyramid augmentation * format the modified file Co-authored-by: Shaoshuai Shi <shaoshuaics@gmail.com>
7f977ea3 · Au1ong · GitHub · a991105c · 7f977ea3 · 7f977ea3
Unverified Commit 7f977ea3 authored Jan 15, 2022 by Au1ong Committed by GitHub Jan 15, 2022
3 changed files
--- a/pcdet/datasets/augmentor/augmentor_utils.py
+++ b/pcdet/datasets/augmentor/augmentor_utils.py
@@ -2,6 +2,7 @@ import numpy as np
 import math
 import copy
 from ...utils import common_utils
+from ...utils import box_utils


 def random_flip_along_x(gt_boxes, points):
@@ -16,12 +17,13 @@ def random_flip_along_x(gt_boxes, points):
        gt_boxes[:, 1] = -gt_boxes[:, 1]
        gt_boxes[:, 6] = -gt_boxes[:, 6]
        points[:, 1] = -points[:, 1]
-
+        
        if gt_boxes.shape[1] > 7:
            gt_boxes[:, 8] = -gt_boxes[:, 8]
-
+    
    return gt_boxes, points

+
 def random_flip_along_y(gt_boxes, points):
    """
    Args:
@@ -40,6 +42,7 @@ def random_flip_along_y(gt_boxes, points):

    return gt_boxes, points

+
 def global_rotation(gt_boxes, points, rot_range):
    """
    Args:
@@ -60,6 +63,7 @@ def global_rotation(gt_boxes, points, rot_range):

    return gt_boxes, points

+
 def global_scaling(gt_boxes, points, scale_range):
    """
    Args:
@@ -76,6 +80,7 @@ def global_scaling(gt_boxes, points, scale_range):

    return gt_boxes, points

+
 def random_image_flip_horizontal(image, depth_map, gt_boxes, calib):
    """
    Performs random horizontal flip augmentation
@@ -96,7 +101,7 @@ def random_image_flip_horizontal(image, depth_map, gt_boxes, calib):
        # Flip images
        aug_image = np.fliplr(image)
        aug_depth_map = np.fliplr(depth_map)
-
+        
        # Flip 3D gt_boxes by flipping the centroids in image space
        aug_gt_boxes = copy.copy(gt_boxes)
        locations = aug_gt_boxes[:, :3]
@@ -115,6 +120,7 @@ def random_image_flip_horizontal(image, depth_map, gt_boxes, calib):

    return aug_image, aug_depth_map, aug_gt_boxes

+
 def random_translation_along_x(gt_boxes, points, offset_range):
    """
    Args:
@@ -127,12 +133,13 @@ def random_translation_along_x(gt_boxes, points, offset_range):

    points[:, 0] += offset
    gt_boxes[:, 0] += offset
-
+    
    # if gt_boxes.shape[1] > 7:
    #     gt_boxes[:, 7] += offset
-
+    
    return gt_boxes, points

+
 def random_translation_along_y(gt_boxes, points, offset_range):
    """
    Args:
@@ -145,12 +152,13 @@ def random_translation_along_y(gt_boxes, points, offset_range):

    points[:, 1] += offset
    gt_boxes[:, 1] += offset
-
+    
    # if gt_boxes.shape[1] > 8:
    #     gt_boxes[:, 8] += offset
-
+    
    return gt_boxes, points

+
 def random_translation_along_z(gt_boxes, points, offset_range):
    """
    Args:
@@ -160,12 +168,13 @@ def random_translation_along_z(gt_boxes, points, offset_range):
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])
-
+    
    points[:, 2] += offset
    gt_boxes[:, 2] += offset

    return gt_boxes, points

+
 def random_local_translation_along_x(gt_boxes, points, offset_range):
    """
    Args:
@@ -180,14 +189,15 @@ def random_local_translation_along_x(gt_boxes, points, offset_range):
        # augs[f'object_{idx}'] = offset
        points_in_box, mask = get_points_in_box(points, box)
        points[mask, 0] += offset
-
+        
        gt_boxes[idx, 0] += offset
-
+    
        # if gt_boxes.shape[1] > 7:
        #     gt_boxes[idx, 7] += offset
-
+    
    return gt_boxes, points

+
 def random_local_translation_along_y(gt_boxes, points, offset_range):
    """
    Args:
@@ -202,14 +212,15 @@ def random_local_translation_along_y(gt_boxes, points, offset_range):
        # augs[f'object_{idx}'] = offset
        points_in_box, mask = get_points_in_box(points, box)
        points[mask, 1] += offset
-
+        
        gt_boxes[idx, 1] += offset
-
+    
        # if gt_boxes.shape[1] > 8:
        #     gt_boxes[idx, 8] += offset
-
+    
    return gt_boxes, points

+
 def random_local_translation_along_z(gt_boxes, points, offset_range):
    """
    Args:
@@ -224,11 +235,12 @@ def random_local_translation_along_z(gt_boxes, points, offset_range):
        # augs[f'object_{idx}'] = offset
        points_in_box, mask = get_points_in_box(points, box)
        points[mask, 2] += offset
-
+        
        gt_boxes[idx, 2] += offset
-
+    
    return gt_boxes, points

+
 def global_frustum_dropout_top(gt_boxes, points, intensity_range):
    """
    Args:
@@ -238,13 +250,14 @@ def global_frustum_dropout_top(gt_boxes, points, intensity_range):
    Returns:
    """
    intensity = np.random.uniform(intensity_range[0], intensity_range[1])
-
+    # threshold = max - length * uniform(0 ~ 0.2)
    threshold = np.max(points[:, 2]) - intensity * (np.max(points[:, 2]) - np.min(points[:, 2]))
-    points = points[points[:,2] < threshold]
-    gt_boxes = gt_boxes[gt_boxes[:,2] < threshold]
-
+    
+    points = points[points[:, 2] < threshold]
+    gt_boxes = gt_boxes[gt_boxes[:, 2] < threshold]
    return gt_boxes, points

+
 def global_frustum_dropout_bottom(gt_boxes, points, intensity_range):
    """
    Args:
@@ -254,13 +267,14 @@ def global_frustum_dropout_bottom(gt_boxes, points, intensity_range):
    Returns:
    """
    intensity = np.random.uniform(intensity_range[0], intensity_range[1])
-
+    
    threshold = np.min(points[:, 2]) + intensity * (np.max(points[:, 2]) - np.min(points[:, 2]))
-    points = points[points[:,2] > threshold]
-    gt_boxes = gt_boxes[gt_boxes[:,2] > threshold]
-
+    points = points[points[:, 2] > threshold]
+    gt_boxes = gt_boxes[gt_boxes[:, 2] > threshold]
+    
    return gt_boxes, points

+
 def global_frustum_dropout_left(gt_boxes, points, intensity_range):
    """
    Args:
@@ -270,13 +284,14 @@ def global_frustum_dropout_left(gt_boxes, points, intensity_range):
    Returns:
    """
    intensity = np.random.uniform(intensity_range[0], intensity_range[1])
-
+    
    threshold = np.max(points[:, 1]) - intensity * (np.max(points[:, 1]) - np.min(points[:, 1]))
-    points = points[points[:,1] < threshold]
-    gt_boxes = gt_boxes[gt_boxes[:,1] < threshold]
-
+    points = points[points[:, 1] < threshold]
+    gt_boxes = gt_boxes[gt_boxes[:, 1] < threshold]
+    
    return gt_boxes, points

+
 def global_frustum_dropout_right(gt_boxes, points, intensity_range):
    """
    Args:
@@ -286,13 +301,14 @@ def global_frustum_dropout_right(gt_boxes, points, intensity_range):
    Returns:
    """
    intensity = np.random.uniform(intensity_range[0], intensity_range[1])
-
+    
    threshold = np.min(points[:, 1]) + intensity * (np.max(points[:, 1]) - np.min(points[:, 1]))
-    points = points[points[:,1] > threshold]
-    gt_boxes = gt_boxes[gt_boxes[:,1] > threshold]
-
+    points = points[points[:, 1] > threshold]
+    gt_boxes = gt_boxes[gt_boxes[:, 1] > threshold]
+    
    return gt_boxes, points

+
 def local_scaling(gt_boxes, points, scale_range):
    """
    Args:
@@ -303,7 +319,7 @@ def local_scaling(gt_boxes, points, scale_range):
    """
    if scale_range[1] - scale_range[0] < 1e-3:
        return gt_boxes, points
-
+    
    # augs = {}
    for idx, box in enumerate(gt_boxes):
        noise_scale = np.random.uniform(scale_range[0], scale_range[1])
@@ -314,15 +330,15 @@ def local_scaling(gt_boxes, points, scale_range):
        points[mask, 0] -= box[0]
        points[mask, 1] -= box[1]
        points[mask, 2] -= box[2]
-
+        
        # apply scaling
        points[mask, :3] *= noise_scale
-
+        
        # tranlation back to original position
        points[mask, 0] += box[0]
        points[mask, 1] += box[1]
        points[mask, 2] += box[2]
-
+        
        gt_boxes[idx, 3:6] *= noise_scale
    return gt_boxes, points

@@ -344,7 +360,7 @@ def local_rotation(gt_boxes, points, rot_range):
        centroid_x = box[0]
        centroid_y = box[1]
        centroid_z = box[2]
-
+        
        # tranlation to axis center
        points[mask, 0] -= centroid_x
        points[mask, 1] -= centroid_y
@@ -352,11 +368,11 @@ def local_rotation(gt_boxes, points, rot_range):
        box[0] -= centroid_x
        box[1] -= centroid_y
        box[2] -= centroid_z
-
+        
        # apply rotation
        points[mask, :] = common_utils.rotate_points_along_z(points[np.newaxis, mask, :], np.array([noise_rotation]))[0]
        box[0:3] = common_utils.rotate_points_along_z(box[np.newaxis, np.newaxis, 0:3], np.array([noise_rotation]))[0][0]
-
+        
        # tranlation back to original position
        points[mask, 0] += centroid_x
        points[mask, 1] += centroid_y
@@ -364,16 +380,17 @@ def local_rotation(gt_boxes, points, rot_range):
        box[0] += centroid_x
        box[1] += centroid_y
        box[2] += centroid_z
-
+        
        gt_boxes[idx, 6] += noise_rotation
        if gt_boxes.shape[1] > 8:
            gt_boxes[idx, 7:9] = common_utils.rotate_points_along_z(
                np.hstack((gt_boxes[idx, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :],
                np.array([noise_rotation])
            )[0][:, 0:2]
-
+    
    return gt_boxes, points

+
 def local_frustum_dropout_top(gt_boxes, points, intensity_range):
    """
    Args:
@@ -384,15 +401,16 @@ def local_frustum_dropout_top(gt_boxes, points, intensity_range):
    """
    for idx, box in enumerate(gt_boxes):
        x, y, z, dx, dy, dz = box[0], box[1], box[2], box[3], box[4], box[5]
-
+        
        intensity = np.random.uniform(intensity_range[0], intensity_range[1])
        points_in_box, mask = get_points_in_box(points, box)
-        threshold = (z + dz/2) - intensity * dz
-
-        points = points[np.logical_not(np.logical_and(mask, points[:,2] >= threshold))]
-
+        threshold = (z + dz / 2) - intensity * dz
+        
+        points = points[np.logical_not(np.logical_and(mask, points[:, 2] >= threshold))]
+    
    return gt_boxes, points

+
 def local_frustum_dropout_bottom(gt_boxes, points, intensity_range):
    """
    Args:
@@ -403,15 +421,16 @@ def local_frustum_dropout_bottom(gt_boxes, points, intensity_range):
    """
    for idx, box in enumerate(gt_boxes):
        x, y, z, dx, dy, dz = box[0], box[1], box[2], box[3], box[4], box[5]
-
+        
        intensity = np.random.uniform(intensity_range[0], intensity_range[1])
        points_in_box, mask = get_points_in_box(points, box)
-        threshold = (z - dz/2) + intensity * dz
-
-        points = points[np.logical_not(np.logical_and(mask, points[:,2] <= threshold))]
-
+        threshold = (z - dz / 2) + intensity * dz
+        
+        points = points[np.logical_not(np.logical_and(mask, points[:, 2] <= threshold))]
+    
    return gt_boxes, points

+
 def local_frustum_dropout_left(gt_boxes, points, intensity_range):
    """
    Args:
@@ -422,15 +441,16 @@ def local_frustum_dropout_left(gt_boxes, points, intensity_range):
    """
    for idx, box in enumerate(gt_boxes):
        x, y, z, dx, dy, dz = box[0], box[1], box[2], box[3], box[4], box[5]
-
+        
        intensity = np.random.uniform(intensity_range[0], intensity_range[1])
        points_in_box, mask = get_points_in_box(points, box)
-        threshold = (y + dy/2) - intensity * dy
-
-        points = points[np.logical_not(np.logical_and(mask, points[:,1] >= threshold))]
-
+        threshold = (y + dy / 2) - intensity * dy
+        
+        points = points[np.logical_not(np.logical_and(mask, points[:, 1] >= threshold))]
+    
    return gt_boxes, points

+
 def local_frustum_dropout_right(gt_boxes, points, intensity_range):
    """
    Args:
@@ -441,30 +461,223 @@ def local_frustum_dropout_right(gt_boxes, points, intensity_range):
    """
    for idx, box in enumerate(gt_boxes):
        x, y, z, dx, dy, dz = box[0], box[1], box[2], box[3], box[4], box[5]
-
+        
        intensity = np.random.uniform(intensity_range[0], intensity_range[1])
        points_in_box, mask = get_points_in_box(points, box)
-        threshold = (y - dy/2) + intensity * dy
-
-        points = points[np.logical_not(np.logical_and(mask, points[:,1] <= threshold))]
-
+        threshold = (y - dy / 2) + intensity * dy
+        
+        points = points[np.logical_not(np.logical_and(mask, points[:, 1] <= threshold))]
+    
    return gt_boxes, points

+
 def get_points_in_box(points, gt_box):
-    x, y, z = points[:,0], points[:,1], points[:,2]
+    x, y, z = points[:, 0], points[:, 1], points[:, 2]
    cx, cy, cz = gt_box[0], gt_box[1], gt_box[2]
    dx, dy, dz, rz = gt_box[3], gt_box[4], gt_box[5], gt_box[6]
    shift_x, shift_y, shift_z = x - cx, y - cy, z - cz
-
+    
    MARGIN = 1e-1
    cosa, sina = math.cos(-rz), math.sin(-rz)
    local_x = shift_x * cosa + shift_y * (-sina)
    local_y = shift_x * sina + shift_y * cosa
-
-    mask = np.logical_and(abs(shift_z) <= dz / 2.0, \
-             np.logical_and(abs(local_x) <= dx / 2.0 + MARGIN, \
-                 abs(local_y) <= dy / 2.0 + MARGIN ))
-
+    
+    mask = np.logical_and(abs(shift_z) <= dz / 2.0, 
+                          np.logical_and(abs(local_x) <= dx / 2.0 + MARGIN, 
+                                         abs(local_y) <= dy / 2.0 + MARGIN))
+    
    points = points[mask]
-
+    
    return points, mask
+
+
+def get_pyramids(boxes):
+    pyramid_orders = np.array([
+        [0, 1, 5, 4],
+        [4, 5, 6, 7],
+        [7, 6, 2, 3],
+        [3, 2, 1, 0],
+        [1, 2, 6, 5],
+        [0, 4, 7, 3]
+    ])
+    boxes_corners = box_utils.boxes_to_corners_3d(boxes).reshape(-1, 24)
+    
+    pyramid_list = []
+    for order in pyramid_orders:
+        # frustum polygon: 5 corners, 5 surfaces
+        pyramid = np.concatenate((
+            boxes[:, 0:3],
+            boxes_corners[:, 3 * order[0]: 3 * order[0] + 3],
+            boxes_corners[:, 3 * order[1]: 3 * order[1] + 3],
+            boxes_corners[:, 3 * order[2]: 3 * order[2] + 3],
+            boxes_corners[:, 3 * order[3]: 3 * order[3] + 3]), axis=1)
+        pyramid_list.append(pyramid[:, None, :])
+    pyramids = np.concatenate(pyramid_list, axis=1)  # [N, 6, 15], 15=5*3
+    return pyramids
+
+
+def one_hot(x, num_class=1):
+    if num_class is None:
+        num_class = 1
+    ohx = np.zeros((len(x), num_class))
+    ohx[range(len(x)), x] = 1
+    return ohx
+
+
+def points_in_pyramids_mask(points, pyramids):
+    pyramids = pyramids.reshape(-1, 5, 3)
+    flags = np.zeros((points.shape[0], pyramids.shape[0]), dtype=np.bool)
+    for i, pyramid in enumerate(pyramids):
+        flags[:, i] = np.logical_or(flags[:, i], box_utils.in_hull(points[:, 0:3], pyramid))
+    return flags
+
+
+def local_pyramid_dropout(gt_boxes, points, dropout_prob, pyramids=None):
+    if pyramids is None:
+        pyramids = get_pyramids(gt_boxes).reshape([-1, 6, 5, 3])  # each six surface of boxes: [num_boxes, 6, 15=3*5]
+    drop_pyramid_indices = np.random.randint(0, 6, (pyramids.shape[0]))
+    drop_pyramid_one_hot = one_hot(drop_pyramid_indices, num_class=6)
+    drop_box_mask = np.random.uniform(0, 1, (pyramids.shape[0])) <= dropout_prob
+    if np.sum(drop_box_mask) != 0:
+        drop_pyramid_mask = (np.tile(drop_box_mask[:, None], [1, 6]) * drop_pyramid_one_hot) > 0
+        drop_pyramids = pyramids[drop_pyramid_mask]
+        point_masks = points_in_pyramids_mask(points, drop_pyramids)
+        points = points[np.logical_not(point_masks.any(-1))]
+    # print(drop_box_mask)
+    pyramids = pyramids[np.logical_not(drop_box_mask)]
+    return gt_boxes, points, pyramids
+
+
+def local_pyramid_sparsify(gt_boxes, points, prob, max_num_pts, pyramids=None):
+    if pyramids is None:
+        pyramids = get_pyramids(gt_boxes).reshape([-1, 6, 5, 3])  # each six surface of boxes: [num_boxes, 6, 15=3*5]
+    if pyramids.shape[0] > 0:
+        sparsity_prob, sparsity_num = prob, max_num_pts
+        sparsify_pyramid_indices = np.random.randint(0, 6, (pyramids.shape[0]))
+        sparsify_pyramid_one_hot = one_hot(sparsify_pyramid_indices, num_class=6)
+        sparsify_box_mask = np.random.uniform(0, 1, (pyramids.shape[0])) <= sparsity_prob
+        sparsify_pyramid_mask = (np.tile(sparsify_box_mask[:, None], [1, 6]) * sparsify_pyramid_one_hot) > 0
+        # print(sparsify_box_mask)
+        
+        pyramid_sampled = pyramids[sparsify_pyramid_mask]  # (-1,6,5,3)[(num_sample,6)]
+        # print(pyramid_sampled.shape)
+        pyramid_sampled_point_masks = points_in_pyramids_mask(points, pyramid_sampled)
+        pyramid_sampled_points_num = pyramid_sampled_point_masks.sum(0)  # the number of points in each surface pyramid
+        valid_pyramid_sampled_mask = pyramid_sampled_points_num > sparsity_num  # only much than sparsity_num should be sparse
+        
+        sparsify_pyramids = pyramid_sampled[valid_pyramid_sampled_mask]
+        if sparsify_pyramids.shape[0] > 0:
+            point_masks = pyramid_sampled_point_masks[:, valid_pyramid_sampled_mask]
+            remain_points = points[
+                np.logical_not(point_masks.any(-1))]  # points which outside the down sampling pyramid
+            to_sparsify_points = [points[point_masks[:, i]] for i in range(point_masks.shape[1])]
+            
+            sparsified_points = []
+            for sample in to_sparsify_points:
+                sampled_indices = np.random.choice(sample.shape[0], size=sparsity_num, replace=False)
+                sparsified_points.append(sample[sampled_indices])
+            sparsified_points = np.concatenate(sparsified_points, axis=0)
+            points = np.concatenate([remain_points, sparsified_points], axis=0)
+        pyramids = pyramids[np.logical_not(sparsify_box_mask)]
+    return gt_boxes, points, pyramids
+
+
+def local_pyramid_swap(gt_boxes, points, prob, max_num_pts, pyramids=None):
+    def get_points_ratio(points, pyramid):
+        surface_center = (pyramid[3:6] + pyramid[6:9] + pyramid[9:12] + pyramid[12:]) / 4.0
+        vector_0, vector_1, vector_2 = pyramid[6:9] - pyramid[3:6], pyramid[12:] - pyramid[3:6], pyramid[0:3] - surface_center
+        alphas = ((points[:, 0:3] - pyramid[3:6]) * vector_0).sum(-1) / np.power(vector_0, 2).sum()
+        betas = ((points[:, 0:3] - pyramid[3:6]) * vector_1).sum(-1) / np.power(vector_1, 2).sum()
+        gammas = ((points[:, 0:3] - surface_center) * vector_2).sum(-1) / np.power(vector_2, 2).sum()
+        return [alphas, betas, gammas]
+    
+    def recover_points_by_ratio(points_ratio, pyramid):
+        alphas, betas, gammas = points_ratio
+        surface_center = (pyramid[3:6] + pyramid[6:9] + pyramid[9:12] + pyramid[12:]) / 4.0
+        vector_0, vector_1, vector_2 = pyramid[6:9] - pyramid[3:6], pyramid[12:] - pyramid[3:6], pyramid[0:3] - surface_center
+        points = (alphas[:, None] * vector_0 + betas[:, None] * vector_1) + pyramid[3:6] + gammas[:, None] * vector_2
+        return points
+    
+    def recover_points_intensity_by_ratio(points_intensity_ratio, max_intensity, min_intensity):
+        return points_intensity_ratio * (max_intensity - min_intensity) + min_intensity
+    
+    # swap partition
+    if pyramids is None:
+        pyramids = get_pyramids(gt_boxes).reshape([-1, 6, 5, 3])  # each six surface of boxes: [num_boxes, 6, 15=3*5]
+    swap_prob, num_thres = prob, max_num_pts
+    swap_pyramid_mask = np.random.uniform(0, 1, (pyramids.shape[0])) <= swap_prob
+    
+    if swap_pyramid_mask.sum() > 0:
+        point_masks = points_in_pyramids_mask(points, pyramids)
+        point_nums = point_masks.sum(0).reshape(pyramids.shape[0], -1)  # [N, 6]
+        non_zero_pyramids_mask = point_nums > num_thres  # ingore dropout pyramids or highly occluded pyramids
+        selected_pyramids = non_zero_pyramids_mask * swap_pyramid_mask[:,
+                                                     None]  # selected boxes and all their valid pyramids
+        # print(selected_pyramids)
+        if selected_pyramids.sum() > 0:
+            # get to_swap pyramids
+            index_i, index_j = np.nonzero(selected_pyramids)
+            selected_pyramid_indices = [np.random.choice(index_j[index_i == i]) \
+                                            if e and (index_i == i).any() else 0 for i, e in
+                                        enumerate(swap_pyramid_mask)]
+            selected_pyramids_mask = selected_pyramids * one_hot(selected_pyramid_indices, num_class=6) == 1
+            to_swap_pyramids = pyramids[selected_pyramids_mask]
+            
+            # get swapped pyramids
+            index_i, index_j = np.nonzero(selected_pyramids_mask)
+            non_zero_pyramids_mask[selected_pyramids_mask] = False
+            swapped_index_i = np.array([np.random.choice(np.where(non_zero_pyramids_mask[:, j])[0]) if \
+                                            np.where(non_zero_pyramids_mask[:, j])[0].shape[0] > 0 else
+                                        index_i[i] for i, j in enumerate(index_j.tolist())])
+            swapped_indicies = np.concatenate([swapped_index_i[:, None], index_j[:, None]], axis=1)
+            swapped_pyramids = pyramids[
+                swapped_indicies[:, 0].astype(np.int32), swapped_indicies[:, 1].astype(np.int32)]
+            
+            # concat to_swap&swapped pyramids
+            swap_pyramids = np.concatenate([to_swap_pyramids, swapped_pyramids], axis=0)
+            swap_point_masks = points_in_pyramids_mask(points, swap_pyramids)
+            remain_points = points[np.logical_not(swap_point_masks.any(-1))]
+            
+            # swap pyramids
+            points_res = []
+            num_swapped_pyramids = swapped_pyramids.shape[0]
+            for i in range(num_swapped_pyramids):
+                to_swap_pyramid = to_swap_pyramids[i]
+                swapped_pyramid = swapped_pyramids[i]
+                
+                to_swap_points = points[swap_point_masks[:, i]]
+                swapped_points = points[swap_point_masks[:, i + num_swapped_pyramids]]
+                # for intensity transform
+                to_swap_points_intensity_ratio = (to_swap_points[:, -1:] - to_swap_points[:, -1:].min()) / \
+                                                 np.clip(
+                                                     (to_swap_points[:, -1:].max() - to_swap_points[:, -1:].min()),
+                                                     1e-6, 1)
+                swapped_points_intensity_ratio = (swapped_points[:, -1:] - swapped_points[:, -1:].min()) / \
+                                                 np.clip(
+                                                     (swapped_points[:, -1:].max() - swapped_points[:, -1:].min()),
+                                                     1e-6, 1)
+                
+                to_swap_points_ratio = get_points_ratio(to_swap_points, to_swap_pyramid.reshape(15))
+                swapped_points_ratio = get_points_ratio(swapped_points, swapped_pyramid.reshape(15))
+                new_to_swap_points = recover_points_by_ratio(swapped_points_ratio, to_swap_pyramid.reshape(15))
+                new_swapped_points = recover_points_by_ratio(to_swap_points_ratio, swapped_pyramid.reshape(15))
+                # for intensity transform
+                new_to_swap_points_intensity = recover_points_intensity_by_ratio(
+                    swapped_points_intensity_ratio, to_swap_points[:, -1:].max(),
+                    to_swap_points[:, -1:].min())
+                new_swapped_points_intensity = recover_points_intensity_by_ratio(
+                    to_swap_points_intensity_ratio, swapped_points[:, -1:].max(),
+                    swapped_points[:, -1:].min())
+                
+                # new_to_swap_points = np.concatenate([new_to_swap_points, swapped_points[:, -1:]], axis=1)
+                # new_swapped_points = np.concatenate([new_swapped_points, to_swap_points[:, -1:]], axis=1)
+                
+                new_to_swap_points = np.concatenate([new_to_swap_points, new_to_swap_points_intensity], axis=1)
+                new_swapped_points = np.concatenate([new_swapped_points, new_swapped_points_intensity], axis=1)
+                
+                points_res.append(new_to_swap_points)
+                points_res.append(new_swapped_points)
+            
+            points_res = np.concatenate(points_res, axis=0)
+            points = np.concatenate([remain_points, points_res], axis=0)
+    return gt_boxes, points
--- a/pcdet/datasets/augmentor/data_augmentor.py
+++ b/pcdet/datasets/augmentor/data_augmentor.py
@@ -11,18 +11,18 @@ class DataAugmentor(object):
        self.root_path = root_path
        self.class_names = class_names
        self.logger = logger
-
+        
        self.data_augmentor_queue = []
        aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \
            else augmentor_configs.AUG_CONFIG_LIST
-
+        
        for cur_cfg in aug_config_list:
            if not isinstance(augmentor_configs, list):
                if cur_cfg.NAME in augmentor_configs.DISABLE_AUG_LIST:
                    continue
            cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
            self.data_augmentor_queue.append(cur_augmentor)
-
+    
    def gt_sampling(self, config=None):
        db_sampler = database_sampler.DataBaseSampler(
            root_path=self.root_path,
@@ -31,15 +31,15 @@ class DataAugmentor(object):
            logger=self.logger
        )
        return db_sampler
-
+    
    def __getstate__(self):
        d = dict(self.__dict__)
        del d['logger']
        return d
-
+    
    def __setstate__(self, d):
        self.__dict__.update(d)
-   
+    
    def random_world_flip(self, data_dict=None, config=None):
        if data_dict is None:
            return partial(self.random_world_flip, config=config)
@@ -49,11 +49,11 @@ class DataAugmentor(object):
            gt_boxes, points = getattr(augmentor_utils, 'random_flip_along_%s' % cur_axis)(
                gt_boxes, points,
            )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_world_rotation(self, data_dict=None, config=None):
        if data_dict is None:
            return partial(self.random_world_rotation, config=config)
@@ -63,22 +63,22 @@ class DataAugmentor(object):
        gt_boxes, points = augmentor_utils.global_rotation(
            data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range
        )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_world_scaling(self, data_dict=None, config=None):
        if data_dict is None:
            return partial(self.random_world_scaling, config=config)
        gt_boxes, points = augmentor_utils.global_scaling(
            data_dict['gt_boxes'], data_dict['points'], config['WORLD_SCALE_RANGE']
        )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_image_flip(self, data_dict=None, config=None):
        if data_dict is None:
            return partial(self.random_image_flip, config=config)
@@ -92,12 +92,12 @@ class DataAugmentor(object):
            images, depth_maps, gt_boxes = getattr(augmentor_utils, 'random_image_flip_%s' % cur_axis)(
                images, depth_maps, gt_boxes, calib,
            )
-
+        
        data_dict['images'] = images
        data_dict['depth_maps'] = depth_maps
        data_dict['gt_boxes'] = gt_boxes
        return data_dict
-
+    
    def random_world_translation(self, data_dict=None, config=None):
        """
        Please check the correctness of it before using.
@@ -111,11 +111,11 @@ class DataAugmentor(object):
            gt_boxes, points = getattr(augmentor_utils, 'random_translation_along_%s' % cur_axis)(
                gt_boxes, points, offset_range,
            )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_local_translation(self, data_dict=None, config=None):
        """
        Please check the correctness of it before using.
@@ -129,11 +129,11 @@ class DataAugmentor(object):
            gt_boxes, points = getattr(augmentor_utils, 'random_local_translation_along_%s' % cur_axis)(
                gt_boxes, points, offset_range,
            )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_local_rotation(self, data_dict=None, config=None):
        """
        Please check the correctness of it before using.
@@ -146,11 +146,11 @@ class DataAugmentor(object):
        gt_boxes, points = augmentor_utils.local_rotation(
            data_dict['gt_boxes'], data_dict['points'], rot_range=rot_range
        )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_local_scaling(self, data_dict=None, config=None):
        """
        Please check the correctness of it before using.
@@ -160,18 +160,18 @@ class DataAugmentor(object):
        gt_boxes, points = augmentor_utils.local_scaling(
            data_dict['gt_boxes'], data_dict['points'], config['LOCAL_SCALE_RANGE']
        )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_world_frustum_dropout(self, data_dict=None, config=None):
        """
        Please check the correctness of it before using.
        """
        if data_dict is None:
            return partial(self.random_world_frustum_dropout, config=config)
-
+        
        intensity_range = config['INTENSITY_RANGE']
        gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
        for direction in config['DIRECTION']:
@@ -179,18 +179,18 @@ class DataAugmentor(object):
            gt_boxes, points = getattr(augmentor_utils, 'global_frustum_dropout_%s' % direction)(
                gt_boxes, points, intensity_range,
            )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
    def random_local_frustum_dropout(self, data_dict=None, config=None):
        """
        Please check the correctness of it before using.
        """
        if data_dict is None:
            return partial(self.random_local_frustum_dropout, config=config)
-
+        
        intensity_range = config['INTENSITY_RANGE']
        gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
        for direction in config['DIRECTION']:
@@ -198,11 +198,34 @@ class DataAugmentor(object):
            gt_boxes, points = getattr(augmentor_utils, 'local_frustum_dropout_%s' % direction)(
                gt_boxes, points, intensity_range,
            )
-
+        
        data_dict['gt_boxes'] = gt_boxes
        data_dict['points'] = points
        return data_dict
-
+    
+    def random_local_pyramid_aug(self, data_dict=None, config=None):
+        """
+        Refer to the paper: 
+            SE-SSD: Self-Ensembling Single-Stage Object Detector From Point Cloud
+        """
+        if data_dict is None:
+            return partial(self.random_local_pyramid_aug, config=config)
+        
+        gt_boxes, points = data_dict['gt_boxes'], data_dict['points']
+        
+        gt_boxes, points, pyramids = augmentor_utils.local_pyramid_dropout(gt_boxes, points, config['DROP_PROB'])
+        gt_boxes, points, pyramids = augmentor_utils.local_pyramid_sparsify(gt_boxes, points,
+                                                                            config['SPARSIFY_PROB'],
+                                                                            config['SPARSIFY_MAX_NUM'],
+                                                                            pyramids)
+        gt_boxes, points = augmentor_utils.local_pyramid_swap(gt_boxes, points,
+                                                                 config['SWAP_PROB'],
+                                                                 config['SWAP_MAX_NUM'],
+                                                                 pyramids)
+        data_dict['gt_boxes'] = gt_boxes
+        data_dict['points'] = points
+        return data_dict
+    
    def forward(self, data_dict):
        """
        Args:
@@ -216,7 +239,7 @@ class DataAugmentor(object):
        """
        for cur_augmentor in self.data_augmentor_queue:
            data_dict = cur_augmentor(data_dict=data_dict)
-
+        
        data_dict['gt_boxes'][:, 6] = common_utils.limit_period(
            data_dict['gt_boxes'][:, 6], offset=0.5, period=2 * np.pi
        )
@@ -230,6 +253,6 @@ class DataAugmentor(object):
            data_dict['gt_names'] = data_dict['gt_names'][gt_boxes_mask]
            if 'gt_boxes2d' in data_dict:
                data_dict['gt_boxes2d'] = data_dict['gt_boxes2d'][gt_boxes_mask]
-
+            
            data_dict.pop('gt_boxes_mask')
-        return data_dict
\ No newline at end of file
+        return data_dict
--- a/tools/cfgs/kitti_models/pointpillar_pyramid_aug.yaml
+++ b/tools/cfgs/kitti_models/pointpillar_pyramid_aug.yaml
+CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
+
+DATA_CONFIG: 
+    _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
+    POINT_CLOUD_RANGE: [0, -39.68, -3, 69.12, 39.68, 1]
+    DATA_PROCESSOR:
+        - NAME: mask_points_and_boxes_outside_range
+          REMOVE_OUTSIDE_BOXES: True
+
+        - NAME: shuffle_points
+          SHUFFLE_ENABLED: {
+            'train': True,
+            'test': False
+          }
+
+        - NAME: transform_points_to_voxels
+          VOXEL_SIZE: [0.16, 0.16, 4]
+          MAX_POINTS_PER_VOXEL: 32
+          MAX_NUMBER_OF_VOXELS: {
+            'train': 16000,
+            'test': 40000
+          }
+    DATA_AUGMENTOR:
+        DISABLE_AUG_LIST: ['placeholder']
+        AUG_CONFIG_LIST:
+            - NAME: gt_sampling
+              USE_ROAD_PLANE: True
+              DB_INFO_PATH:
+                  - kitti_dbinfos_train.pkl
+              PREPARE: {
+                 filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'],
+                 filter_by_difficulty: [-1],
+              }
+
+              SAMPLE_GROUPS: ['Car:15','Pedestrian:15', 'Cyclist:15']
+              NUM_POINT_FEATURES: 4
+              DATABASE_WITH_FAKELIDAR: False
+              REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0]
+              LIMIT_WHOLE_SCENE: False
+
+            - NAME: random_world_flip
+              ALONG_AXIS_LIST: ['x']
+
+            - NAME: random_world_rotation
+              WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]
+
+            - NAME: random_world_scaling
+              WORLD_SCALE_RANGE: [0.95, 1.05]
+
+            - NAME: random_local_pyramid_aug
+              DROP_PROB: 0.25
+              SPARSIFY_PROB: 0.05
+              SPARSIFY_MAX_NUM: 50
+              SWAP_PROB: 0.1
+              SWAP_MAX_NUM: 50
+MODEL:
+    NAME: PointPillar
+
+    VFE:
+        NAME: PillarVFE
+        WITH_DISTANCE: False
+        USE_ABSLOTE_XYZ: True
+        USE_NORM: True
+        NUM_FILTERS: [64]
+
+    MAP_TO_BEV:
+        NAME: PointPillarScatter
+        NUM_BEV_FEATURES: 64
+
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+        LAYER_NUMS: [3, 5, 5]
+        LAYER_STRIDES: [2, 2, 2]
+        NUM_FILTERS: [64, 128, 256]
+        UPSAMPLE_STRIDES: [1, 2, 4]
+        NUM_UPSAMPLE_FILTERS: [128, 128, 128]
+
+    DENSE_HEAD:
+        NAME: AnchorHeadSingle
+        CLASS_AGNOSTIC: False
+
+        USE_DIRECTION_CLASSIFIER: True
+        DIR_OFFSET: 0.78539
+        DIR_LIMIT_OFFSET: 0.0
+        NUM_DIR_BINS: 2
+
+        ANCHOR_GENERATOR_CONFIG: [
+            {
+                'class_name': 'Car',
+                'anchor_sizes': [[3.9, 1.6, 1.56]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.78],
+                'align_center': False,
+                'feature_map_stride': 2,
+                'matched_threshold': 0.6,
+                'unmatched_threshold': 0.45
+            },
+            {
+                'class_name': 'Pedestrian',
+                'anchor_sizes': [[0.8, 0.6, 1.73]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.6],
+                'align_center': False,
+                'feature_map_stride': 2,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            },
+            {
+                'class_name': 'Cyclist',
+                'anchor_sizes': [[1.76, 0.6, 1.73]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.6],
+                'align_center': False,
+                'feature_map_stride': 2,
+                'matched_threshold': 0.5,
+                'unmatched_threshold': 0.35
+            }
+        ]
+
+        TARGET_ASSIGNER_CONFIG:
+            NAME: AxisAlignedTargetAssigner
+            POS_FRACTION: -1.0
+            SAMPLE_SIZE: 512
+            NORM_BY_NUM_EXAMPLES: False
+            MATCH_HEIGHT: False
+            BOX_CODER: ResidualCoder
+
+        LOSS_CONFIG:
+            LOSS_WEIGHTS: {
+                'cls_weight': 1.0,
+                'loc_weight': 2.0,
+                'dir_weight': 0.2,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            }
+
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+
+        EVAL_METRIC: kitti
+
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.01
+            NMS_PRE_MAXSIZE: 4096
+            NMS_POST_MAXSIZE: 500
+
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 4
+    NUM_EPOCHS: 80
+
+    OPTIMIZER: adam_onecycle
+    LR: 0.003
+    WEIGHT_DECAY: 0.01
+    MOMENTUM: 0.9
+
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+
+    GRAD_NORM_CLIP: 10