"segmentation/vscode:/vscode.git/clone" did not exist on "c4552f794aab15e56a00ccb06747e3fa6b8bec38"
Commit c5dfdd71 authored by chenshi3's avatar chenshi3
Browse files

Add multi-modal support for Nuscenes dataset

parent 4dc18496
from functools import partial
import numpy as np
from PIL import Image
from ...utils import common_utils
from . import augmentor_utils, database_sampler
......@@ -23,6 +24,18 @@ class DataAugmentor(object):
cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
self.data_augmentor_queue.append(cur_augmentor)
def disableAugmentation(self, augmentor_configs):
self.data_augmentor_queue = []
aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \
else augmentor_configs.AUG_CONFIG_LIST
for cur_cfg in aug_config_list:
if not isinstance(augmentor_configs, list):
if cur_cfg.NAME in augmentor_configs.DISABLE_AUG_LIST:
continue
cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
self.data_augmentor_queue.append(cur_augmentor)
def gt_sampling(self, config=None):
db_sampler = database_sampler.DataBaseSampler(
root_path=self.root_path,
......@@ -139,6 +152,7 @@ class DataAugmentor(object):
data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
data_dict['noise_translate'] = noise_translate
return data_dict
def random_local_translation(self, data_dict=None, config=None):
......@@ -251,6 +265,28 @@ class DataAugmentor(object):
data_dict['points'] = points
return data_dict
def imgaug(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.imgaug, config=config)
imgs = data_dict["camera_imgs"]
img_process_infos = data_dict['img_process_infos']
new_imgs = []
for img, img_process_info in zip(imgs, img_process_infos):
flip = False
if config.RAND_FLIP and np.random.choice([0, 1]):
flip = True
rotate = np.random.uniform(*config.ROT_LIM)
# aug images
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
img_process_info[2] = flip
img_process_info[3] = rotate
new_imgs.append(img)
data_dict["camera_imgs"] = new_imgs
return data_dict
def forward(self, data_dict):
"""
Args:
......
......@@ -2,6 +2,7 @@ from collections import defaultdict
from pathlib import Path
import numpy as np
import torch
import torch.utils.data as torch_data
from ..utils import common_utils
......@@ -130,6 +131,30 @@ class DatasetTemplate(torch_data.Dataset):
"""
raise NotImplementedError
def set_lidar_aug_matrix(self, data_dict):
"""
Get lidar augment matrix (4 x 4), which are used to recover orig point coordinates.
"""
lidar_aug_matrix = np.eye(4)
if 'flip_y' in data_dict.keys():
flip_x = data_dict['flip_x']
flip_y = data_dict['flip_y']
if flip_x:
lidar_aug_matrix[:3,:3] = np.array([[1, 0, 0], [0, -1, 0], [0, 0, 1]]) @ lidar_aug_matrix[:3,:3]
if flip_y:
lidar_aug_matrix[:3,:3] = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) @ lidar_aug_matrix[:3,:3]
if 'noise_rot' in data_dict.keys():
noise_rot = data_dict['noise_rot']
lidar_aug_matrix[:3,:3] = common_utils.angle2matrix(torch.tensor(noise_rot)) @ lidar_aug_matrix[:3,:3]
if 'noise_scale' in data_dict.keys():
noise_scale = data_dict['noise_scale']
lidar_aug_matrix[:3,:3] *= noise_scale
if 'noise_translate' in data_dict.keys():
noise_translate = data_dict['noise_translate']
lidar_aug_matrix[:3,3:4] = noise_translate.T
data_dict['lidar_aug_matrix'] = lidar_aug_matrix
return data_dict
def prepare_data(self, data_dict):
"""
Args:
......@@ -165,6 +190,7 @@ class DatasetTemplate(torch_data.Dataset):
)
if 'calib' in data_dict:
data_dict['calib'] = calib
data_dict = self.set_lidar_aug_matrix(data_dict)
if data_dict.get('gt_boxes', None) is not None:
selected = common_utils.keep_arrays_by_name(data_dict['gt_names'], self.class_names)
data_dict['gt_boxes'] = data_dict['gt_boxes'][selected]
......@@ -287,6 +313,8 @@ class DatasetTemplate(torch_data.Dataset):
constant_values=pad_value)
points.append(points_pad)
ret[key] = np.stack(points, axis=0)
elif key in ['camera_imgs']:
ret[key] = torch.stack([torch.stack(imgs,dim=0) for imgs in val],dim=0)
else:
ret[key] = np.stack(val, axis=0)
except:
......
......@@ -8,6 +8,8 @@ from tqdm import tqdm
from ...ops.roiaware_pool3d import roiaware_pool3d_utils
from ...utils import common_utils
from ..dataset import DatasetTemplate
from pyquaternion import Quaternion
from PIL import Image
class NuScenesDataset(DatasetTemplate):
......@@ -17,6 +19,13 @@ class NuScenesDataset(DatasetTemplate):
dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger
)
self.infos = []
self.camera_config = self.dataset_cfg.get('CAMERA_CONFIG', None)
if self.camera_config is not None:
self.use_camera = self.camera_config.get('USE_CAMERA', True)
self.camera_image_config = self.camera_config.IMAGE
else:
self.use_camera = False
self.include_nuscenes_data(self.mode)
if self.training and self.dataset_cfg.get('BALANCED_RESAMPLING', False):
self.infos = self.balanced_infos_resampling(self.infos)
......@@ -108,6 +117,41 @@ class NuScenesDataset(DatasetTemplate):
points = np.concatenate((points, times), axis=1)
return points
def crop_image(self, input_dict):
W, H = input_dict["ori_shape"]
imgs = input_dict["camera_imgs"]
img_process_infos = []
crop_images = []
for img in imgs:
if self.training == True:
fH, fW = self.camera_image_config.FINAL_DIM
resize_lim = self.camera_image_config.RESIZE_LIM_TRAIN
resize = np.random.uniform(*resize_lim)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = newH - fH
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
else:
fH, fW = self.camera_image_config.FINAL_DIM
resize_lim = self.camera_image_config.RESIZE_LIM_TEST
resize = np.mean(resize_lim)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = newH - fH
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
# reisze and crop image
img = img.resize(resize_dims)
img = img.crop(crop)
crop_images.append(img)
img_process_infos.append([resize, crop, False, 0])
input_dict['img_process_infos'] = img_process_infos
input_dict['camera_imgs'] = crop_images
return input_dict
def __len__(self):
if self._merge_all_iters_to_one_epoch:
return len(self.infos) * self.total_epochs
......@@ -137,6 +181,60 @@ class NuScenesDataset(DatasetTemplate):
'gt_names': info['gt_names'] if mask is None else info['gt_names'][mask],
'gt_boxes': info['gt_boxes'] if mask is None else info['gt_boxes'][mask]
})
if self.use_camera:
input_dict["image_paths"] = []
input_dict["lidar2camera"] = []
input_dict["lidar2image"] = []
input_dict["camera2ego"] = []
input_dict["camera_intrinsics"] = []
input_dict["camera2lidar"] = []
for _, camera_info in info["cams"].items():
input_dict["image_paths"].append(camera_info["data_path"])
# lidar to camera transform
lidar2camera_r = np.linalg.inv(camera_info["sensor2lidar_rotation"])
lidar2camera_t = (
camera_info["sensor2lidar_translation"] @ lidar2camera_r.T
)
lidar2camera_rt = np.eye(4).astype(np.float32)
lidar2camera_rt[:3, :3] = lidar2camera_r.T
lidar2camera_rt[3, :3] = -lidar2camera_t
input_dict["lidar2camera"].append(lidar2camera_rt.T)
# camera intrinsics
camera_intrinsics = np.eye(4).astype(np.float32)
camera_intrinsics[:3, :3] = camera_info["camera_intrinsics"]
input_dict["camera_intrinsics"].append(camera_intrinsics)
# lidar to image transform
lidar2image = camera_intrinsics @ lidar2camera_rt.T
input_dict["lidar2image"].append(lidar2image)
# camera to ego transform
camera2ego = np.eye(4).astype(np.float32)
camera2ego[:3, :3] = Quaternion(
camera_info["sensor2ego_rotation"]
).rotation_matrix
camera2ego[:3, 3] = camera_info["sensor2ego_translation"]
input_dict["camera2ego"].append(camera2ego)
# camera to lidar transform
camera2lidar = np.eye(4).astype(np.float32)
camera2lidar[:3, :3] = camera_info["sensor2lidar_rotation"]
camera2lidar[:3, 3] = camera_info["sensor2lidar_translation"]
input_dict["camera2lidar"].append(camera2lidar)
# read image
filename = input_dict["image_paths"]
images = []
for name in filename:
images.append(Image.open(str(self.root_path / name)))
input_dict["camera_imgs"] = images
input_dict["ori_shape"] = images[0].size
# resize and crop image
input_dict = self.crop_image(input_dict)
data_dict = self.prepare_data(data_dict=input_dict)
......@@ -251,7 +349,7 @@ class NuScenesDataset(DatasetTemplate):
pickle.dump(all_db_infos, f)
def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
def create_nuscenes_info(version, data_path, save_path, max_sweeps=10, with_cam=False):
from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits
from . import nuscenes_utils
......@@ -308,6 +406,7 @@ if __name__ == '__main__':
parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset')
parser.add_argument('--func', type=str, default='create_nuscenes_infos', help='')
parser.add_argument('--version', type=str, default='v1.0-trainval', help='')
parser.add_argument('--with_cam', action='store_true', default=False, help='use camera or not')
args = parser.parse_args()
if args.func == 'create_nuscenes_infos':
......@@ -319,6 +418,7 @@ if __name__ == '__main__':
data_path=ROOT_DIR / 'data' / 'nuscenes',
save_path=ROOT_DIR / 'data' / 'nuscenes',
max_sweeps=dataset_cfg.MAX_SWEEPS,
with_cam=args.with_cam
)
nuscenes_dataset = NuScenesDataset(
......
......@@ -249,7 +249,67 @@ def quaternion_yaw(q: Quaternion) -> float:
return yaw
def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10):
def obtain_sensor2top(
nusc, sensor_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, sensor_type="lidar"
):
"""Obtain the info with RT matric from general sensor to Top LiDAR.
Args:
nusc (class): Dataset class in the nuScenes dataset.
sensor_token (str): Sample data token corresponding to the
specific sensor type.
l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
in shape (3, 3).
e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
e2g_r_mat (np.ndarray): Rotation matrix from ego to global
in shape (3, 3).
sensor_type (str): Sensor to calibrate. Default: 'lidar'.
Returns:
sweep (dict): Sweep information after transformation.
"""
sd_rec = nusc.get("sample_data", sensor_token)
cs_record = nusc.get("calibrated_sensor", sd_rec["calibrated_sensor_token"])
pose_record = nusc.get("ego_pose", sd_rec["ego_pose_token"])
data_path = str(nusc.get_sample_data_path(sd_rec["token"]))
# if os.getcwd() in data_path: # path from lyftdataset is absolute path
# data_path = data_path.split(f"{os.getcwd()}/")[-1] # relative path
sweep = {
"data_path": data_path,
"type": sensor_type,
"sample_data_token": sd_rec["token"],
"sensor2ego_translation": cs_record["translation"],
"sensor2ego_rotation": cs_record["rotation"],
"ego2global_translation": pose_record["translation"],
"ego2global_rotation": pose_record["rotation"],
"timestamp": sd_rec["timestamp"],
}
l2e_r_s = sweep["sensor2ego_rotation"]
l2e_t_s = sweep["sensor2ego_translation"]
e2g_r_s = sweep["ego2global_rotation"]
e2g_t_s = sweep["ego2global_translation"]
# obtain the RT from sensor to Top LiDAR
# sweep->ego->global->ego'->lidar
l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
)
T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
)
T -= (
e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+ l2e_t @ np.linalg.inv(l2e_r_mat).T
).squeeze(0)
sweep["sensor2lidar_rotation"] = R.T # points @ R.T + T
sweep["sensor2lidar_translation"] = T
return sweep
def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10, with_cam=False):
train_nusc_infos = []
val_nusc_infos = []
progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True)
......@@ -291,6 +351,34 @@ def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, m
'car_from_global': car_from_global,
'timestamp': ref_time,
}
if with_cam:
info['cams'] = dict()
l2e_r = ref_cs_rec["rotation"]
l2e_t = ref_cs_rec["translation"],
e2g_r = ref_pose_rec["rotation"]
e2g_t = ref_pose_rec["translation"]
l2e_r_mat = Quaternion(l2e_r).rotation_matrix
e2g_r_mat = Quaternion(e2g_r).rotation_matrix
# obtain 6 image's information per frame
camera_types = [
"CAM_FRONT",
"CAM_FRONT_RIGHT",
"CAM_FRONT_LEFT",
"CAM_BACK",
"CAM_BACK_LEFT",
"CAM_BACK_RIGHT",
]
for cam in camera_types:
cam_token = sample["data"][cam]
cam_path, _, camera_intrinsics = nusc.get_sample_data(cam_token)
cam_info = obtain_sensor2top(
nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam
)
cam_info['data_path'] = Path(cam_info['data_path']).relative_to(data_path).__str__()
cam_info.update(camera_intrinsics=camera_intrinsics)
info["cams"].update({cam: cam_info})
sample_data_token = sample['data'][chan]
curr_sd_rec = nusc.get('sample_data', sample_data_token)
......
......@@ -2,7 +2,8 @@ from functools import partial
import numpy as np
from skimage import transform
import torch
import torchvision
from ...utils import box_utils, common_utils
tv = None
......@@ -229,6 +230,56 @@ class DataProcessor(object):
)
return data_dict
def image_normalize(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.image_normalize, config=config)
mean = config.mean
std = config.std
compose = torchvision.transforms.Compose(
[
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=mean, std=std),
]
)
data_dict["camera_imgs"] = [compose(img) for img in data_dict["camera_imgs"]]
return data_dict
def image_calibrate(self,data_dict=None, config=None):
if data_dict is None:
return partial(self.image_calibrate, config=config)
img_process_infos = data_dict['img_process_infos']
transforms = []
for img_process_info in img_process_infos:
resize, crop, flip, rotate = img_process_info
rotation = torch.eye(2)
translation = torch.zeros(2)
# post-homography transformation
rotation *= resize
translation -= torch.Tensor(crop[:2])
if flip:
A = torch.Tensor([[-1, 0], [0, 1]])
b = torch.Tensor([crop[2] - crop[0], 0])
rotation = A.matmul(rotation)
translation = A.matmul(translation) + b
theta = rotate / 180 * np.pi
A = torch.Tensor(
[
[np.cos(theta), np.sin(theta)],
[-np.sin(theta), np.cos(theta)],
]
)
b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
b = A.matmul(-b) + b
rotation = A.matmul(rotation)
translation = A.matmul(translation) + b
transform = torch.eye(4)
transform[:2, :2] = rotation
transform[:2, 3] = translation
transforms.append(transform.numpy())
data_dict["img_aug_matrix"] = transforms
return data_dict
def forward(self, data_dict):
"""
Args:
......
......@@ -22,9 +22,11 @@ def build_network(model_cfg, num_class, dataset):
def load_data_to_gpu(batch_dict):
for key, val in batch_dict.items():
if not isinstance(val, np.ndarray):
if key == 'camera_imgs':
batch_dict[key] = val.cuda()
elif not isinstance(val, np.ndarray):
continue
elif key in ['frame_id', 'metadata', 'calib']:
elif key in ['frame_id', 'metadata', 'calib', 'image_paths','ori_shape','img_process_infos']:
continue
elif key in ['images']:
batch_dict[key] = kornia.image_to_tensor(val).float().cuda().contiguous()
......
......@@ -6,6 +6,7 @@ from .point_head_simple import PointHeadSimple
from .point_intra_part_head import PointIntraPartOffsetHead
from .center_head import CenterHead
from .voxelnext_head import VoxelNeXtHead
from .transfusion_head import TransFusionHead
__all__ = {
'AnchorHeadTemplate': AnchorHeadTemplate,
......@@ -16,4 +17,5 @@ __all__ = {
'AnchorHeadMulti': AnchorHeadMulti,
'CenterHead': CenterHead,
'VoxelNeXtHead': VoxelNeXtHead,
'TransFusionHead': TransFusionHead,
}
......@@ -57,6 +57,24 @@ def rotate_points_along_z(points, angle):
return points_rot.numpy() if is_numpy else points_rot
def angle2matrix(angle):
"""
Args:
angle: angle along z-axis, angle increases x ==> y
Returns:
rot_matrix: (3x3 Tensor) rotation matrix
"""
cosa = torch.cos(angle)
sina = torch.sin(angle)
rot_matrix = torch.tensor([
[cosa, -sina, 0],
[sina, cosa, 0],
[ 0, 0, 1]
])
return rot_matrix
def mask_points_by_range(points, limit_range):
mask = (points[:, 0] >= limit_range[0]) & (points[:, 0] <= limit_range[3]) \
& (points[:, 1] >= limit_range[1]) & (points[:, 1] <= limit_range[4])
......
......@@ -5,12 +5,6 @@ DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
POINT_CLOUD_RANGE: [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]
# sc TODO: just for debug
INFO_PATH: {
'train': [nuscenes_infos_10sweeps_train_with_cam_2d.pkl],
'test': [nuscenes_infos_10sweeps_val_with_cam_2d.pkl],
}
DATA_AUGMENTOR:
DISABLE_AUG_LIST: ['placeholder']
AUG_CONFIG_LIST:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment