Commit d2b71343 authored by 雍大凯's avatar 雍大凯
Browse files

add code

parent 69e57885
# Copyright (c) OpenMMLab. All rights reserved.
import os
from concurrent import futures as futures
from os import path as osp
import mmcv
import numpy as np
class S3DISData(object):
"""S3DIS data.
Generate s3dis infos for s3dis_converter.
Args:
root_path (str): Root path of the raw data.
split (str, optional): Set split type of the data. Default: 'Area_1'.
"""
def __init__(self, root_path, split='Area_1'):
self.root_dir = root_path
self.split = split
self.data_dir = osp.join(root_path,
'Stanford3dDataset_v1.2_Aligned_Version')
# Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture
# classes for detection: table, chair, sofa, bookcase, board.
self.cat_ids = np.array([7, 8, 9, 10, 11])
self.cat_ids2class = {
cat_id: i
for i, cat_id in enumerate(list(self.cat_ids))
}
assert split in [
'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'
]
self.sample_id_list = os.listdir(osp.join(self.data_dir,
split)) # conferenceRoom_1
for sample_id in self.sample_id_list:
if os.path.isfile(osp.join(self.data_dir, split, sample_id)):
self.sample_id_list.remove(sample_id)
def __len__(self):
return len(self.sample_id_list)
def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
"""Get data infos.
This method gets information from the raw data.
Args:
num_workers (int, optional): Number of threads to be used.
Default: 4.
has_label (bool, optional): Whether the data has label.
Default: True.
sample_id_list (list[int], optional): Index list of the sample.
Default: None.
Returns:
infos (list[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx):
print(f'{self.split} sample_idx: {sample_idx}')
info = dict()
pc_info = {
'num_features': 6,
'lidar_idx': f'{self.split}_{sample_idx}'
}
info['point_cloud'] = pc_info
pts_filename = osp.join(self.root_dir, 's3dis_data',
f'{self.split}_{sample_idx}_point.npy')
pts_instance_mask_path = osp.join(
self.root_dir, 's3dis_data',
f'{self.split}_{sample_idx}_ins_label.npy')
pts_semantic_mask_path = osp.join(
self.root_dir, 's3dis_data',
f'{self.split}_{sample_idx}_sem_label.npy')
points = np.load(pts_filename).astype(np.float32)
pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
points.tofile(
osp.join(self.root_dir, 'points',
f'{self.split}_{sample_idx}.bin'))
pts_instance_mask.tofile(
osp.join(self.root_dir, 'instance_mask',
f'{self.split}_{sample_idx}.bin'))
pts_semantic_mask.tofile(
osp.join(self.root_dir, 'semantic_mask',
f'{self.split}_{sample_idx}.bin'))
info['pts_path'] = osp.join('points',
f'{self.split}_{sample_idx}.bin')
info['pts_instance_mask_path'] = osp.join(
'instance_mask', f'{self.split}_{sample_idx}.bin')
info['pts_semantic_mask_path'] = osp.join(
'semantic_mask', f'{self.split}_{sample_idx}.bin')
info['annos'] = self.get_bboxes(points, pts_instance_mask,
pts_semantic_mask)
return info
sample_id_list = sample_id_list if sample_id_list is not None \
else self.sample_id_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):
"""Convert instance masks to axis-aligned bounding boxes.
Args:
points (np.array): Scene points of shape (n, 6).
pts_instance_mask (np.ndarray): Instance labels of shape (n,).
pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).
Returns:
dict: A dict containing detection infos with following keys:
- gt_boxes_upright_depth (np.ndarray): Bounding boxes
of shape (n, 6)
- class (np.ndarray): Box labels of shape (n,)
- gt_num (int): Number of boxes.
"""
bboxes, labels = [], []
for i in range(1, pts_instance_mask.max() + 1):
ids = pts_instance_mask == i
# check if all instance points have same semantic label
assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
label = pts_semantic_mask[ids][0]
# keep only furniture objects
if label in self.cat_ids2class:
labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])
pts = points[:, :3][ids]
min_pts = pts.min(axis=0)
max_pts = pts.max(axis=0)
locations = (min_pts + max_pts) / 2
dimensions = max_pts - min_pts
bboxes.append(np.concatenate((locations, dimensions)))
annotation = dict()
# follow ScanNet and SUN RGB-D keys
annotation['gt_boxes_upright_depth'] = np.array(bboxes)
annotation['class'] = np.array(labels)
annotation['gt_num'] = len(labels)
return annotation
class S3DISSegData(object):
"""S3DIS dataset used to generate infos for semantic segmentation task.
Args:
data_root (str): Root path of the raw data.
ann_file (str): The generated scannet infos.
split (str, optional): Set split type of the data. Default: 'train'.
num_points (int, optional): Number of points in each data input.
Default: 8192.
label_weight_func (function, optional): Function to compute the
label weight. Default: None.
"""
def __init__(self,
data_root,
ann_file,
split='Area_1',
num_points=4096,
label_weight_func=None):
self.data_root = data_root
self.data_infos = mmcv.load(ann_file)
self.split = split
self.num_points = num_points
self.all_ids = np.arange(13) # all possible ids
self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12]) # used for seg task
self.ignore_index = len(self.cat_ids)
self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
self.ignore_index
for i, cat_id in enumerate(self.cat_ids):
self.cat_id2class[cat_id] = i
# label weighting function is taken from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
label_weight_func is None else label_weight_func
def get_seg_infos(self):
scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
save_folder = osp.join(self.data_root, 'seg_info')
mmcv.mkdir_or_exist(save_folder)
np.save(
osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
scene_idxs)
np.save(
osp.join(save_folder, f'{self.split}_label_weight.npy'),
label_weight)
print(f'{self.split} resampled scene index and label weight saved')
def _convert_to_label(self, mask):
"""Convert class_id in loaded segmentation mask to label."""
if isinstance(mask, str):
if mask.endswith('npy'):
mask = np.load(mask)
else:
mask = np.fromfile(mask, dtype=np.int64)
label = self.cat_id2class[mask]
return label
def get_scene_idxs_and_label_weight(self):
"""Compute scene_idxs for data sampling and label weight for loss
calculation.
We sample more times for scenes with more points. Label_weight is
inversely proportional to number of class points.
"""
num_classes = len(self.cat_ids)
num_point_all = []
label_weight = np.zeros((num_classes + 1, )) # ignore_index
for data_info in self.data_infos:
label = self._convert_to_label(
osp.join(self.data_root, data_info['pts_semantic_mask_path']))
num_point_all.append(label.shape[0])
class_count, _ = np.histogram(label, range(num_classes + 2))
label_weight += class_count
# repeat scene_idx for num_scene_point // num_sample_point times
sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = []
for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++
label_weight = label_weight[:-1].astype(np.float32)
label_weight = label_weight / label_weight.sum()
label_weight = self.label_weight_func(label_weight).astype(np.float32)
return scene_idxs, label_weight
# Copyright (c) OpenMMLab. All rights reserved.
import os
from concurrent import futures as futures
from os import path as osp
import mmcv
import numpy as np
class ScanNetData(object):
"""ScanNet data.
Generate scannet infos for scannet_converter.
Args:
root_path (str): Root path of the raw data.
split (str, optional): Set split type of the data. Default: 'train'.
"""
def __init__(self, root_path, split='train'):
self.root_dir = root_path
self.split = split
self.split_dir = osp.join(root_path)
self.classes = [
'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin'
]
self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
self.label2cat = {self.cat2label[t]: t for t in self.cat2label}
self.cat_ids = np.array(
[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
self.cat_ids2class = {
nyu40id: i
for i, nyu40id in enumerate(list(self.cat_ids))
}
assert split in ['train', 'val', 'test']
split_file = osp.join(self.root_dir, 'meta_data',
f'scannetv2_{split}.txt')
mmcv.check_file_exist(split_file)
self.sample_id_list = mmcv.list_from_file(split_file)
self.test_mode = (split == 'test')
def __len__(self):
return len(self.sample_id_list)
def get_aligned_box_label(self, idx):
box_file = osp.join(self.root_dir, 'scannet_instance_data',
f'{idx}_aligned_bbox.npy')
mmcv.check_file_exist(box_file)
return np.load(box_file)
def get_unaligned_box_label(self, idx):
box_file = osp.join(self.root_dir, 'scannet_instance_data',
f'{idx}_unaligned_bbox.npy')
mmcv.check_file_exist(box_file)
return np.load(box_file)
def get_axis_align_matrix(self, idx):
matrix_file = osp.join(self.root_dir, 'scannet_instance_data',
f'{idx}_axis_align_matrix.npy')
mmcv.check_file_exist(matrix_file)
return np.load(matrix_file)
def get_images(self, idx):
paths = []
path = osp.join(self.root_dir, 'posed_images', idx)
for file in sorted(os.listdir(path)):
if file.endswith('.jpg'):
paths.append(osp.join('posed_images', idx, file))
return paths
def get_extrinsics(self, idx):
extrinsics = []
path = osp.join(self.root_dir, 'posed_images', idx)
for file in sorted(os.listdir(path)):
if file.endswith('.txt') and not file == 'intrinsic.txt':
extrinsics.append(np.loadtxt(osp.join(path, file)))
return extrinsics
def get_intrinsics(self, idx):
matrix_file = osp.join(self.root_dir, 'posed_images', idx,
'intrinsic.txt')
mmcv.check_file_exist(matrix_file)
return np.loadtxt(matrix_file)
def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
"""Get data infos.
This method gets information from the raw data.
Args:
num_workers (int, optional): Number of threads to be used.
Default: 4.
has_label (bool, optional): Whether the data has label.
Default: True.
sample_id_list (list[int], optional): Index list of the sample.
Default: None.
Returns:
infos (list[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx):
print(f'{self.split} sample_idx: {sample_idx}')
info = dict()
pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
info['point_cloud'] = pc_info
pts_filename = osp.join(self.root_dir, 'scannet_instance_data',
f'{sample_idx}_vert.npy')
points = np.load(pts_filename)
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
points.tofile(
osp.join(self.root_dir, 'points', f'{sample_idx}.bin'))
info['pts_path'] = osp.join('points', f'{sample_idx}.bin')
# update with RGB image paths if exist
if os.path.exists(osp.join(self.root_dir, 'posed_images')):
info['intrinsics'] = self.get_intrinsics(sample_idx)
all_extrinsics = self.get_extrinsics(sample_idx)
all_img_paths = self.get_images(sample_idx)
# some poses in ScanNet are invalid
extrinsics, img_paths = [], []
for extrinsic, img_path in zip(all_extrinsics, all_img_paths):
if np.all(np.isfinite(extrinsic)):
img_paths.append(img_path)
extrinsics.append(extrinsic)
info['extrinsics'] = extrinsics
info['img_paths'] = img_paths
if not self.test_mode:
pts_instance_mask_path = osp.join(
self.root_dir, 'scannet_instance_data',
f'{sample_idx}_ins_label.npy')
pts_semantic_mask_path = osp.join(
self.root_dir, 'scannet_instance_data',
f'{sample_idx}_sem_label.npy')
pts_instance_mask = np.load(pts_instance_mask_path).astype(
np.int64)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(
np.int64)
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
pts_instance_mask.tofile(
osp.join(self.root_dir, 'instance_mask',
f'{sample_idx}.bin'))
pts_semantic_mask.tofile(
osp.join(self.root_dir, 'semantic_mask',
f'{sample_idx}.bin'))
info['pts_instance_mask_path'] = osp.join(
'instance_mask', f'{sample_idx}.bin')
info['pts_semantic_mask_path'] = osp.join(
'semantic_mask', f'{sample_idx}.bin')
if has_label:
annotations = {}
# box is of shape [k, 6 + class]
aligned_box_label = self.get_aligned_box_label(sample_idx)
unaligned_box_label = self.get_unaligned_box_label(sample_idx)
annotations['gt_num'] = aligned_box_label.shape[0]
if annotations['gt_num'] != 0:
aligned_box = aligned_box_label[:, :-1] # k, 6
unaligned_box = unaligned_box_label[:, :-1]
classes = aligned_box_label[:, -1] # k
annotations['name'] = np.array([
self.label2cat[self.cat_ids2class[classes[i]]]
for i in range(annotations['gt_num'])
])
# default names are given to aligned bbox for compatibility
# we also save unaligned bbox info with marked names
annotations['location'] = aligned_box[:, :3]
annotations['dimensions'] = aligned_box[:, 3:6]
annotations['gt_boxes_upright_depth'] = aligned_box
annotations['unaligned_location'] = unaligned_box[:, :3]
annotations['unaligned_dimensions'] = unaligned_box[:, 3:6]
annotations[
'unaligned_gt_boxes_upright_depth'] = unaligned_box
annotations['index'] = np.arange(
annotations['gt_num'], dtype=np.int32)
annotations['class'] = np.array([
self.cat_ids2class[classes[i]]
for i in range(annotations['gt_num'])
])
axis_align_matrix = self.get_axis_align_matrix(sample_idx)
annotations['axis_align_matrix'] = axis_align_matrix # 4x4
info['annos'] = annotations
return info
sample_id_list = sample_id_list if sample_id_list is not None \
else self.sample_id_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
class ScanNetSegData(object):
"""ScanNet dataset used to generate infos for semantic segmentation task.
Args:
data_root (str): Root path of the raw data.
ann_file (str): The generated scannet infos.
split (str, optional): Set split type of the data. Default: 'train'.
num_points (int, optional): Number of points in each data input.
Default: 8192.
label_weight_func (function, optional): Function to compute the
label weight. Default: None.
"""
def __init__(self,
data_root,
ann_file,
split='train',
num_points=8192,
label_weight_func=None):
self.data_root = data_root
self.data_infos = mmcv.load(ann_file)
self.split = split
assert split in ['train', 'val', 'test']
self.num_points = num_points
self.all_ids = np.arange(41) # all possible ids
self.cat_ids = np.array([
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,
39
]) # used for seg task
self.ignore_index = len(self.cat_ids)
self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
self.ignore_index
for i, cat_id in enumerate(self.cat_ids):
self.cat_id2class[cat_id] = i
# label weighting function is taken from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
label_weight_func is None else label_weight_func
def get_seg_infos(self):
if self.split == 'test':
return
scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
save_folder = osp.join(self.data_root, 'seg_info')
mmcv.mkdir_or_exist(save_folder)
np.save(
osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
scene_idxs)
np.save(
osp.join(save_folder, f'{self.split}_label_weight.npy'),
label_weight)
print(f'{self.split} resampled scene index and label weight saved')
def _convert_to_label(self, mask):
"""Convert class_id in loaded segmentation mask to label."""
if isinstance(mask, str):
if mask.endswith('npy'):
mask = np.load(mask)
else:
mask = np.fromfile(mask, dtype=np.int64)
label = self.cat_id2class[mask]
return label
def get_scene_idxs_and_label_weight(self):
"""Compute scene_idxs for data sampling and label weight for loss
calculation.
We sample more times for scenes with more points. Label_weight is
inversely proportional to number of class points.
"""
num_classes = len(self.cat_ids)
num_point_all = []
label_weight = np.zeros((num_classes + 1, )) # ignore_index
for data_info in self.data_infos:
label = self._convert_to_label(
osp.join(self.data_root, data_info['pts_semantic_mask_path']))
num_point_all.append(label.shape[0])
class_count, _ = np.histogram(label, range(num_classes + 2))
label_weight += class_count
# repeat scene_idx for num_scene_point // num_sample_point times
sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = []
for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++
label_weight = label_weight[:-1].astype(np.float32)
label_weight = label_weight / label_weight.sum()
label_weight = self.label_weight_func(label_weight).astype(np.float32)
return scene_idxs, label_weight
# Copyright (c) OpenMMLab. All rights reserved.
from concurrent import futures as futures
from os import path as osp
import mmcv
import numpy as np
from scipy import io as sio
def random_sampling(points, num_points, replace=None):
"""Random sampling.
Sampling point cloud to a certain number of points.
Args:
points (ndarray): Point cloud.
num_points (int): The number of samples.
replace (bool): Whether the sample is with or without replacement.
Returns:
points (ndarray): Point cloud after sampling.
"""
if num_points < 0:
return points
if replace is None:
replace = (points.shape[0] < num_points)
choices = np.random.choice(points.shape[0], num_points, replace=replace)
return points[choices]
class SUNRGBDInstance(object):
def __init__(self, line):
data = line.split(' ')
data[1:] = [float(x) for x in data[1:]]
self.classname = data[0]
self.xmin = data[1]
self.ymin = data[2]
self.xmax = data[1] + data[3]
self.ymax = data[2] + data[4]
self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
self.centroid = np.array([data[5], data[6], data[7]])
self.width = data[8]
self.length = data[9]
self.height = data[10]
# data[9] is x_size (length), data[8] is y_size (width), data[10] is
# z_size (height) in our depth coordinate system,
# l corresponds to the size along the x axis
self.size = np.array([data[9], data[8], data[10]]) * 2
self.orientation = np.zeros((3, ))
self.orientation[0] = data[11]
self.orientation[1] = data[12]
self.heading_angle = np.arctan2(self.orientation[1],
self.orientation[0])
self.box3d = np.concatenate(
[self.centroid, self.size, self.heading_angle[None]])
class SUNRGBDData(object):
"""SUNRGBD data.
Generate scannet infos for sunrgbd_converter.
Args:
root_path (str): Root path of the raw data.
split (str, optional): Set split type of the data. Default: 'train'.
use_v1 (bool, optional): Whether to use v1. Default: False.
num_points (int, optional): Number of points to sample. Set to -1
to utilize all points. Defaults to -1.
"""
def __init__(self, root_path, split='train', use_v1=False, num_points=-1):
self.root_dir = root_path
self.split = split
self.split_dir = osp.join(root_path, 'sunrgbd_trainval')
self.num_points = num_points
self.classes = [
'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'
]
self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
self.label2cat = {
label: self.classes[label]
for label in range(len(self.classes))
}
assert split in ['train', 'val', 'test']
split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')
mmcv.check_file_exist(split_file)
self.sample_id_list = map(int, mmcv.list_from_file(split_file))
self.image_dir = osp.join(self.split_dir, 'image')
self.calib_dir = osp.join(self.split_dir, 'calib')
self.depth_dir = osp.join(self.split_dir, 'depth')
if use_v1:
self.label_dir = osp.join(self.split_dir, 'label_v1')
else:
self.label_dir = osp.join(self.split_dir, 'label')
def __len__(self):
return len(self.sample_id_list)
def get_image(self, idx):
img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')
return mmcv.imread(img_filename)
def get_image_shape(self, idx):
image = self.get_image(idx)
return np.array(image.shape[:2], dtype=np.int32)
def get_depth(self, idx):
depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')
depth = sio.loadmat(depth_filename)['instance']
return depth
def get_calibration(self, idx):
calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')
lines = [line.rstrip() for line in open(calib_filepath)]
Rt = np.array([float(x) for x in lines[0].split(' ')])
Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)
K = np.array([float(x) for x in lines[1].split(' ')])
K = np.reshape(K, (3, 3), order='F').astype(np.float32)
return K, Rt
def get_label_objects(self, idx):
label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')
lines = [line.rstrip() for line in open(label_filename)]
objects = [SUNRGBDInstance(line) for line in lines]
return objects
def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
"""Get data infos.
This method gets information from the raw data.
Args:
num_workers (int, optional): Number of threads to be used.
Default: 4.
has_label (bool, optional): Whether the data has label.
Default: True.
sample_id_list (list[int], optional): Index list of the sample.
Default: None.
Returns:
infos (list[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx):
print(f'{self.split} sample_idx: {sample_idx}')
# convert depth to points
pc_upright_depth = self.get_depth(sample_idx)
pc_upright_depth_subsampled = random_sampling(
pc_upright_depth, self.num_points)
info = dict()
pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
info['point_cloud'] = pc_info
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
pc_upright_depth_subsampled.tofile(
osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))
info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')
img_path = osp.join('image', f'{sample_idx:06d}.jpg')
image_info = {
'image_idx': sample_idx,
'image_shape': self.get_image_shape(sample_idx),
'image_path': img_path
}
info['image'] = image_info
K, Rt = self.get_calibration(sample_idx)
calib_info = {'K': K, 'Rt': Rt}
info['calib'] = calib_info
if has_label:
obj_list = self.get_label_objects(sample_idx)
annotations = {}
annotations['gt_num'] = len([
obj.classname for obj in obj_list
if obj.classname in self.cat2label.keys()
])
if annotations['gt_num'] != 0:
annotations['name'] = np.array([
obj.classname for obj in obj_list
if obj.classname in self.cat2label.keys()
])
annotations['bbox'] = np.concatenate([
obj.box2d.reshape(1, 4) for obj in obj_list
if obj.classname in self.cat2label.keys()
],
axis=0)
annotations['location'] = np.concatenate([
obj.centroid.reshape(1, 3) for obj in obj_list
if obj.classname in self.cat2label.keys()
],
axis=0)
annotations['dimensions'] = 2 * np.array([
[obj.length, obj.width, obj.height] for obj in obj_list
if obj.classname in self.cat2label.keys()
]) # lwh (depth) format
annotations['rotation_y'] = np.array([
obj.heading_angle for obj in obj_list
if obj.classname in self.cat2label.keys()
])
annotations['index'] = np.arange(
len(obj_list), dtype=np.int32)
annotations['class'] = np.array([
self.cat2label[obj.classname] for obj in obj_list
if obj.classname in self.cat2label.keys()
])
annotations['gt_boxes_upright_depth'] = np.stack(
[
obj.box3d for obj in obj_list
if obj.classname in self.cat2label.keys()
],
axis=0) # (K,8)
info['annos'] = annotations
return info
sample_id_list = sample_id_list if \
sample_id_list is not None else self.sample_id_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
# Copyright (c) OpenMMLab. All rights reserved.
r"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
try:
from waymo_open_dataset import dataset_pb2
except ImportError:
raise ImportError(
'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
'to install the official devkit first.')
from glob import glob
from os.path import join
import mmcv
import numpy as np
import tensorflow as tf
from waymo_open_dataset.utils import range_image_utils, transform_utils
from waymo_open_dataset.utils.frame_utils import \
parse_range_image_and_camera_projection
class Waymo2KITTI(object):
"""Waymo to KITTI converter.
This class serves as the converter to change the waymo raw data to KITTI
format.
Args:
load_dir (str): Directory to load waymo raw data.
save_dir (str): Directory to save data in KITTI format.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (int, optional): Number of workers for the parallel process.
test_mode (bool, optional): Whether in the test_mode. Default: False.
"""
def __init__(self,
load_dir,
save_dir,
prefix,
workers=64,
test_mode=False):
self.filter_empty_3dboxes = True
self.filter_no_label_zone_points = True
self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST']
# Only data collected in specific locations will be converted
# If set None, this filter is disabled
# Available options: location_sf (main dataset)
self.selected_waymo_locations = None
self.save_track_id = False
# turn on eager execution for older tensorflow versions
if int(tf.__version__.split('.')[0]) < 2:
tf.enable_eager_execution()
self.lidar_list = [
'_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',
'_SIDE_LEFT'
]
self.type_list = [
'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST'
]
self.waymo_to_kitti_class_map = {
'UNKNOWN': 'DontCare',
'PEDESTRIAN': 'Pedestrian',
'VEHICLE': 'Car',
'CYCLIST': 'Cyclist',
'SIGN': 'Sign' # not in kitti
}
self.load_dir = load_dir
self.save_dir = save_dir
self.prefix = prefix
self.workers = int(workers)
self.test_mode = test_mode
self.tfrecord_pathnames = sorted(
glob(join(self.load_dir, '*.tfrecord')))
self.label_save_dir = f'{self.save_dir}/label_'
self.label_all_save_dir = f'{self.save_dir}/label_all'
self.image_save_dir = f'{self.save_dir}/image_'
self.calib_save_dir = f'{self.save_dir}/calib'
self.point_cloud_save_dir = f'{self.save_dir}/velodyne'
self.pose_save_dir = f'{self.save_dir}/pose'
self.timestamp_save_dir = f'{self.save_dir}/timestamp'
self.create_folder()
def convert(self):
"""Convert action."""
print('Start converting ...')
mmcv.track_parallel_progress(self.convert_one, range(len(self)),
self.workers)
print('\nFinished ...')
def convert_one(self, file_idx):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
pathname = self.tfrecord_pathnames[file_idx]
dataset = tf.data.TFRecordDataset(pathname, compression_type='')
for frame_idx, data in enumerate(dataset):
frame = dataset_pb2.Frame()
frame.ParseFromString(bytearray(data.numpy()))
if (self.selected_waymo_locations is not None
and frame.context.stats.location
not in self.selected_waymo_locations):
continue
self.save_image(frame, file_idx, frame_idx)
self.save_calib(frame, file_idx, frame_idx)
self.save_lidar(frame, file_idx, frame_idx)
self.save_pose(frame, file_idx, frame_idx)
self.save_timestamp(frame, file_idx, frame_idx)
if not self.test_mode:
self.save_label(frame, file_idx, frame_idx)
def __len__(self):
"""Length of the filename list."""
return len(self.tfrecord_pathnames)
def save_image(self, frame, file_idx, frame_idx):
"""Parse and save the images in png format.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
for img in frame.images:
img_path = f'{self.image_save_dir}{str(img.name - 1)}/' + \
f'{self.prefix}{str(file_idx).zfill(3)}' + \
f'{str(frame_idx).zfill(3)}.png'
img = mmcv.imfrombytes(img.image)
mmcv.imwrite(img, img_path)
def save_calib(self, frame, file_idx, frame_idx):
"""Parse and save the calibration data.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
# waymo front camera to kitti reference camera
T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0],
[1.0, 0.0, 0.0]])
camera_calibs = []
R0_rect = [f'{i:e}' for i in np.eye(3).flatten()]
Tr_velo_to_cams = []
calib_context = ''
for camera in frame.context.camera_calibrations:
# extrinsic parameters
T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape(
4, 4)
T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle)
Tr_velo_to_cam = \
self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam
if camera.name == 1: # FRONT = 1, see dataset.proto for details
self.T_velo_to_front_cam = Tr_velo_to_cam.copy()
Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, ))
Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam])
# intrinsic parameters
camera_calib = np.zeros((3, 4))
camera_calib[0, 0] = camera.intrinsic[0]
camera_calib[1, 1] = camera.intrinsic[1]
camera_calib[0, 2] = camera.intrinsic[2]
camera_calib[1, 2] = camera.intrinsic[3]
camera_calib[2, 2] = 1
camera_calib = list(camera_calib.reshape(12))
camera_calib = [f'{i:e}' for i in camera_calib]
camera_calibs.append(camera_calib)
# all camera ids are saved as id-1 in the result because
# camera 0 is unknown in the proto
for i in range(5):
calib_context += 'P' + str(i) + ': ' + \
' '.join(camera_calibs[i]) + '\n'
calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\n'
for i in range(5):
calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \
' '.join(Tr_velo_to_cams[i]) + '\n'
with open(
f'{self.calib_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt',
'w+') as fp_calib:
fp_calib.write(calib_context)
fp_calib.close()
def save_lidar(self, frame, file_idx, frame_idx):
"""Parse and save the lidar data in psd format.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
range_images, camera_projections, range_image_top_pose = \
parse_range_image_and_camera_projection(frame)
# First return
points_0, cp_points_0, intensity_0, elongation_0, mask_indices_0 = \
self.convert_range_image_to_point_cloud(
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=0
)
points_0 = np.concatenate(points_0, axis=0)
intensity_0 = np.concatenate(intensity_0, axis=0)
elongation_0 = np.concatenate(elongation_0, axis=0)
mask_indices_0 = np.concatenate(mask_indices_0, axis=0)
# Second return
points_1, cp_points_1, intensity_1, elongation_1, mask_indices_1 = \
self.convert_range_image_to_point_cloud(
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=1
)
points_1 = np.concatenate(points_1, axis=0)
intensity_1 = np.concatenate(intensity_1, axis=0)
elongation_1 = np.concatenate(elongation_1, axis=0)
mask_indices_1 = np.concatenate(mask_indices_1, axis=0)
points = np.concatenate([points_0, points_1], axis=0)
intensity = np.concatenate([intensity_0, intensity_1], axis=0)
elongation = np.concatenate([elongation_0, elongation_1], axis=0)
mask_indices = np.concatenate([mask_indices_0, mask_indices_1], axis=0)
# timestamp = frame.timestamp_micros * np.ones_like(intensity)
# concatenate x,y,z, intensity, elongation, timestamp (6-dim)
point_cloud = np.column_stack(
(points, intensity, elongation, mask_indices))
pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'
point_cloud.astype(np.float32).tofile(pc_path)
def save_label(self, frame, file_idx, frame_idx):
"""Parse and save the label data in txt format.
The relation between waymo and kitti coordinates is noteworthy:
1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)
2. x-y-z: front-left-up (waymo) -> right-down-front(kitti)
3. bbox origin at volumetric center (waymo) -> bottom center (kitti)
4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo)
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
fp_label_all = open(
f'{self.label_all_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')
id_to_bbox = dict()
id_to_name = dict()
for labels in frame.projected_lidar_labels:
name = labels.name
for label in labels.labels:
# TODO: need a workaround as bbox may not belong to front cam
bbox = [
label.box.center_x - label.box.length / 2,
label.box.center_y - label.box.width / 2,
label.box.center_x + label.box.length / 2,
label.box.center_y + label.box.width / 2
]
id_to_bbox[label.id] = bbox
id_to_name[label.id] = name - 1
for obj in frame.laser_labels:
bounding_box = None
name = None
id = obj.id
for lidar in self.lidar_list:
if id + lidar in id_to_bbox:
bounding_box = id_to_bbox.get(id + lidar)
name = str(id_to_name.get(id + lidar))
break
if bounding_box is None or name is None:
name = '0'
bounding_box = (0, 0, 0, 0)
my_type = self.type_list[obj.type]
if my_type not in self.selected_waymo_classes:
continue
if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1:
continue
my_type = self.waymo_to_kitti_class_map[my_type]
height = obj.box.height
width = obj.box.width
length = obj.box.length
x = obj.box.center_x
y = obj.box.center_y
z = obj.box.center_z - height / 2
# project bounding box to the virtual reference frame
pt_ref = self.T_velo_to_front_cam @ \
np.array([x, y, z, 1]).reshape((4, 1))
x, y, z, _ = pt_ref.flatten().tolist()
rotation_y = -obj.box.heading - np.pi / 2
track_id = obj.id
# not available
truncated = 0
occluded = 0
alpha = -10
line = my_type + \
' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format(
round(truncated, 2), occluded, round(alpha, 2),
round(bounding_box[0], 2), round(bounding_box[1], 2),
round(bounding_box[2], 2), round(bounding_box[3], 2),
round(height, 2), round(width, 2), round(length, 2),
round(x, 2), round(y, 2), round(z, 2),
round(rotation_y, 2))
if self.save_track_id:
line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n'
else:
line_all = line[:-1] + ' ' + name + '\n'
fp_label = open(
f'{self.label_save_dir}{name}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')
fp_label.write(line)
fp_label.close()
fp_label_all.write(line_all)
fp_label_all.close()
def save_pose(self, frame, file_idx, frame_idx):
"""Parse and save the pose data.
Note that SDC's own pose is not included in the regular training
of KITTI dataset. KITTI raw dataset contains ego motion files
but are not often used. Pose is important for algorithms that
take advantage of the temporal information.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
pose = np.array(frame.pose.transform).reshape(4, 4)
np.savetxt(
join(f'{self.pose_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
pose)
def save_timestamp(self, frame, file_idx, frame_idx):
"""Save the timestamp data in a separate file instead of the
pointcloud.
Note that SDC's own pose is not included in the regular training
of KITTI dataset. KITTI raw dataset contains ego motion files
but are not often used. Pose is important for algorithms that
take advantage of the temporal information.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
with open(
join(f'{self.timestamp_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
'w') as f:
f.write(str(frame.timestamp_micros))
def create_folder(self):
"""Create folder for data preprocessing."""
if not self.test_mode:
dir_list1 = [
self.label_all_save_dir, self.calib_save_dir,
self.point_cloud_save_dir, self.pose_save_dir,
self.timestamp_save_dir
]
dir_list2 = [self.label_save_dir, self.image_save_dir]
else:
dir_list1 = [
self.calib_save_dir, self.point_cloud_save_dir,
self.pose_save_dir, self.timestamp_save_dir
]
dir_list2 = [self.image_save_dir]
for d in dir_list1:
mmcv.mkdir_or_exist(d)
for d in dir_list2:
for i in range(5):
mmcv.mkdir_or_exist(f'{d}{str(i)}')
def convert_range_image_to_point_cloud(self,
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=0):
"""Convert range images to point cloud.
Args:
frame (:obj:`Frame`): Open dataset frame.
range_images (dict): Mapping from laser_name to list of two
range images corresponding with two returns.
camera_projections (dict): Mapping from laser_name to list of two
camera projections corresponding with two returns.
range_image_top_pose (:obj:`Transform`): Range image pixel pose for
top lidar.
ri_index (int, optional): 0 for the first return,
1 for the second return. Default: 0.
Returns:
tuple[list[np.ndarray]]: (List of points with shape [N, 3],
camera projections of points with shape [N, 6], intensity
with shape [N, 1], elongation with shape [N, 1], points'
position in the depth map (element offset if points come from
the main lidar otherwise -1) with shape[N, 1]). All the
lists have the length of lidar numbers (5).
"""
calibrations = sorted(
frame.context.laser_calibrations, key=lambda c: c.name)
points = []
cp_points = []
intensity = []
elongation = []
mask_indices = []
frame_pose = tf.convert_to_tensor(
value=np.reshape(np.array(frame.pose.transform), [4, 4]))
# [H, W, 6]
range_image_top_pose_tensor = tf.reshape(
tf.convert_to_tensor(value=range_image_top_pose.data),
range_image_top_pose.shape.dims)
# [H, W, 3, 3]
range_image_top_pose_tensor_rotation = \
transform_utils.get_rotation_matrix(
range_image_top_pose_tensor[..., 0],
range_image_top_pose_tensor[..., 1],
range_image_top_pose_tensor[..., 2])
range_image_top_pose_tensor_translation = \
range_image_top_pose_tensor[..., 3:]
range_image_top_pose_tensor = transform_utils.get_transform(
range_image_top_pose_tensor_rotation,
range_image_top_pose_tensor_translation)
for c in calibrations:
range_image = range_images[c.name][ri_index]
if len(c.beam_inclinations) == 0:
beam_inclinations = range_image_utils.compute_inclination(
tf.constant(
[c.beam_inclination_min, c.beam_inclination_max]),
height=range_image.shape.dims[0])
else:
beam_inclinations = tf.constant(c.beam_inclinations)
beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])
range_image_tensor = tf.reshape(
tf.convert_to_tensor(value=range_image.data),
range_image.shape.dims)
pixel_pose_local = None
frame_pose_local = None
if c.name == dataset_pb2.LaserName.TOP:
pixel_pose_local = range_image_top_pose_tensor
pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
frame_pose_local = tf.expand_dims(frame_pose, axis=0)
range_image_mask = range_image_tensor[..., 0] > 0
if self.filter_no_label_zone_points:
nlz_mask = range_image_tensor[..., 3] != 1.0 # 1.0: in NLZ
range_image_mask = range_image_mask & nlz_mask
range_image_cartesian = \
range_image_utils.extract_point_cloud_from_range_image(
tf.expand_dims(range_image_tensor[..., 0], axis=0),
tf.expand_dims(extrinsic, axis=0),
tf.expand_dims(tf.convert_to_tensor(
value=beam_inclinations), axis=0),
pixel_pose=pixel_pose_local,
frame_pose=frame_pose_local)
mask_index = tf.where(range_image_mask)
range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
points_tensor = tf.gather_nd(range_image_cartesian, mask_index)
cp = camera_projections[c.name][ri_index]
cp_tensor = tf.reshape(
tf.convert_to_tensor(value=cp.data), cp.shape.dims)
cp_points_tensor = tf.gather_nd(cp_tensor, mask_index)
points.append(points_tensor.numpy())
cp_points.append(cp_points_tensor.numpy())
intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],
mask_index)
intensity.append(intensity_tensor.numpy())
elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],
mask_index)
elongation.append(elongation_tensor.numpy())
if c.name == 1:
mask_index = (ri_index * range_image_mask.shape[0] +
mask_index[:, 0]
) * range_image_mask.shape[1] + mask_index[:, 1]
mask_index = mask_index.numpy().astype(elongation[-1].dtype)
else:
mask_index = np.full_like(elongation[-1], -1)
mask_indices.append(mask_index)
return points, cp_points, intensity, elongation, mask_indices
def cart_to_homo(self, mat):
"""Convert transformation matrix in Cartesian coordinates to
homogeneous format.
Args:
mat (np.ndarray): Transformation matrix in Cartesian.
The input matrix shape is 3x3 or 3x4.
Returns:
np.ndarray: Transformation matrix in homogeneous format.
The matrix shape is 4x4.
"""
ret = np.eye(4)
if mat.shape == (3, 3):
ret[:3, :3] = mat
elif mat.shape == (3, 4):
ret[:3, :] = mat
else:
raise ValueError(mat.shape)
return ret
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser, Namespace
from pathlib import Path
from tempfile import TemporaryDirectory
import mmcv
try:
from model_archiver.model_packaging import package_model
from model_archiver.model_packaging_utils import ModelExportUtils
except ImportError:
package_model = None
def mmdet3d2torchserve(
config_file: str,
checkpoint_file: str,
output_folder: str,
model_name: str,
model_version: str = '1.0',
force: bool = False,
):
"""Converts MMDetection3D model (config + checkpoint) to TorchServe `.mar`.
Args:
config_file (str):
In MMDetection3D config format.
The contents vary for each task repository.
checkpoint_file (str):
In MMDetection3D checkpoint format.
The contents vary for each task repository.
output_folder (str):
Folder where `{model_name}.mar` will be created.
The file created will be in TorchServe archive format.
model_name (str):
If not None, used for naming the `{model_name}.mar` file
that will be created under `output_folder`.
If None, `{Path(checkpoint_file).stem}` will be used.
model_version (str, optional):
Model's version. Default: '1.0'.
force (bool, optional):
If True, if there is an existing `{model_name}.mar`
file under `output_folder` it will be overwritten.
Default: False.
"""
mmcv.mkdir_or_exist(output_folder)
config = mmcv.Config.fromfile(config_file)
with TemporaryDirectory() as tmpdir:
config.dump(f'{tmpdir}/config.py')
args = Namespace(
**{
'model_file': f'{tmpdir}/config.py',
'serialized_file': checkpoint_file,
'handler': f'{Path(__file__).parent}/mmdet3d_handler.py',
'model_name': model_name or Path(checkpoint_file).stem,
'version': model_version,
'export_path': output_folder,
'force': force,
'requirements_file': None,
'extra_files': None,
'runtime': 'python',
'archive_format': 'default'
})
manifest = ModelExportUtils.generate_manifest_json(args)
package_model(args, manifest)
def parse_args():
parser = ArgumentParser(
description='Convert MMDetection models to TorchServe `.mar` format.')
parser.add_argument('config', type=str, help='config file path')
parser.add_argument('checkpoint', type=str, help='checkpoint file path')
parser.add_argument(
'--output-folder',
type=str,
required=True,
help='Folder where `{model_name}.mar` will be created.')
parser.add_argument(
'--model-name',
type=str,
default=None,
help='If not None, used for naming the `{model_name}.mar`'
'file that will be created under `output_folder`.'
'If None, `{Path(checkpoint_file).stem}` will be used.')
parser.add_argument(
'--model-version',
type=str,
default='1.0',
help='Number used for versioning.')
parser.add_argument(
'-f',
'--force',
action='store_true',
help='overwrite the existing `{model_name}.mar`')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
if package_model is None:
raise ImportError('`torch-model-archiver` is required.'
'Try: pip install torch-model-archiver')
mmdet3d2torchserve(args.config, args.checkpoint, args.output_folder,
args.model_name, args.model_version, args.force)
# Copyright (c) OpenMMLab. All rights reserved.
import base64
import os
import numpy as np
import torch
from ts.torch_handler.base_handler import BaseHandler
from mmdet3d.apis import inference_detector, init_model
from mmdet3d.core.points import get_points_type
class MMdet3dHandler(BaseHandler):
"""MMDetection3D Handler used in TorchServe.
Handler to load models in MMDetection3D, and it will process data to get
predicted results. For now, it only supports SECOND.
"""
threshold = 0.5
load_dim = 4
use_dim = [0, 1, 2, 3]
coord_type = 'LIDAR'
attribute_dims = None
def initialize(self, context):
"""Initialize function loads the model in MMDetection3D.
Args:
context (context): It is a JSON Object containing information
pertaining to the model artifacts parameters.
"""
properties = context.system_properties
self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
self.device = torch.device(self.map_location + ':' +
str(properties.get('gpu_id')) if torch.cuda.
is_available() else self.map_location)
self.manifest = context.manifest
model_dir = properties.get('model_dir')
serialized_file = self.manifest['model']['serializedFile']
checkpoint = os.path.join(model_dir, serialized_file)
self.config_file = os.path.join(model_dir, 'config.py')
self.model = init_model(self.config_file, checkpoint, self.device)
self.initialized = True
def preprocess(self, data):
"""Preprocess function converts data into LiDARPoints class.
Args:
data (List): Input data from the request.
Returns:
`LiDARPoints` : The preprocess function returns the input
point cloud data as LiDARPoints class.
"""
for row in data:
# Compat layer: normally the envelope should just return the data
# directly, but older versions of Torchserve didn't have envelope.
pts = row.get('data') or row.get('body')
if isinstance(pts, str):
pts = base64.b64decode(pts)
points = np.frombuffer(pts, dtype=np.float32)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
points_class = get_points_type(self.coord_type)
points = points_class(
points,
points_dim=points.shape[-1],
attribute_dims=self.attribute_dims)
return points
def inference(self, data):
"""Inference Function.
This function is used to make a prediction call on the
given input request.
Args:
data (`LiDARPoints`): LiDARPoints class passed to make
the inference request.
Returns:
List(dict) : The predicted result is returned in this function.
"""
results, _ = inference_detector(self.model, data)
return results
def postprocess(self, data):
"""Postprocess function.
This function makes use of the output from the inference and
converts it into a torchserve supported response output.
Args:
data (List[dict]): The data received from the prediction
output of the model.
Returns:
List: The post process function returns a list of the predicted
output.
"""
output = []
for pts_index, result in enumerate(data):
output.append([])
if 'pts_bbox' in result.keys():
pred_bboxes = result['pts_bbox']['boxes_3d'].tensor.numpy()
pred_scores = result['pts_bbox']['scores_3d'].numpy()
else:
pred_bboxes = result['boxes_3d'].tensor.numpy()
pred_scores = result['scores_3d'].numpy()
index = pred_scores > self.threshold
bbox_coords = pred_bboxes[index].tolist()
score = pred_scores[index].tolist()
output[pts_index].append({'3dbbox': bbox_coords, 'score': score})
return output
from argparse import ArgumentParser
import numpy as np
import requests
from mmdet3d.apis import inference_detector, init_model
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument('model_name', help='The model name in the server')
parser.add_argument(
'--inference-addr',
default='127.0.0.1:8080',
help='Address and port of the inference server')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.5, help='3d bbox score threshold')
args = parser.parse_args()
return args
def parse_result(input):
bbox = input[0]['3dbbox']
result = np.array(bbox)
return result
def main(args):
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# test a single point cloud file
model_result, _ = inference_detector(model, args.pcd)
# filter the 3d bboxes whose scores > 0.5
if 'pts_bbox' in model_result[0].keys():
pred_bboxes = model_result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
pred_scores = model_result[0]['pts_bbox']['scores_3d'].numpy()
else:
pred_bboxes = model_result[0]['boxes_3d'].tensor.numpy()
pred_scores = model_result[0]['scores_3d'].numpy()
model_result = pred_bboxes[pred_scores > 0.5]
url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
with open(args.pcd, 'rb') as points:
response = requests.post(url, points)
server_result = parse_result(response.json())
assert np.allclose(model_result, server_result)
if __name__ == '__main__':
args = parse_args()
main(args)
#!/usr/bin/env bash
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29501}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/test.py \
$CONFIG \
$CHECKPOINT \
--launcher pytorch \
${@:4}
#!/usr/bin/env bash
CONFIG=$1
GPUS=$2
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/train.py \
$CONFIG \
--seed 0 \
--launcher pytorch ${@:3}
#!/usr/bin/env bash
MASTER_ADDR=${1:-localhost}
MASTER_PORT=6000
NNODES=${2:-1}
NODE_RANK=${3:-0}
GPUS_PER_NODE=8
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
CONFIG=$4
# add numa affinity config
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
torchrun $DISTRIBUTED_ARGS --no-python \
bash -c '
numa_map=( $(hy-smi --showtopo | grep "Numa Node" | awk "{print \$6}") );
LOCAL_RANK=${LOCAL_RANK:-0}
NUMA_ID=${numa_map[$LOCAL_RANK]}
numactl --cpunodebind=${NUMA_ID} --membind=${NUMA_ID} python $(dirname "$0")/tools/train.py "$@"
' _ $CONFIG --launcher pytorch ${@:5}
import argparse
import sys
import os
sys.path.insert(0, os.getcwd())
import torch.onnx
from mmcv import Config
from mmdeploy.backend.tensorrt.utils import save, search_cuda_version
try:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import compat_cfg
except ImportError:
from mmdet3d.utils import compat_cfg
import os
from typing import Dict, Optional, Sequence, Union
import h5py
import mmcv
import numpy as np
import onnx
import pycuda.driver as cuda
import tensorrt as trt
import torch
import tqdm
from mmcv.runner import load_checkpoint
from mmdeploy.apis.core import no_mp
from mmdeploy.backend.tensorrt.calib_utils import HDF5Calibrator
from mmdeploy.backend.tensorrt.init_plugins import load_tensorrt_plugin
from mmdeploy.utils import load_config
from packaging import version
from torch.utils.data import DataLoader
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_model
from mmdet.datasets import replace_ImageToTensor
from tools.misc.fuse_conv_bn import fuse_module
class HDF5CalibratorBEVDet(HDF5Calibrator):
def get_batch(self, names: Sequence[str], **kwargs) -> list:
"""Get batch data."""
if self.count < self.dataset_length:
if self.count % 100 == 0:
print('%d/%d' % (self.count, self.dataset_length))
ret = []
for name in names:
input_group = self.calib_data[name]
if name == 'img':
data_np = input_group[str(self.count)][...].astype(
np.float32)
else:
data_np = input_group[str(self.count)][...].astype(
np.int32)
# tile the tensor so we can keep the same distribute
opt_shape = self.input_shapes[name]['opt_shape']
data_shape = data_np.shape
reps = [
int(np.ceil(opt_s / data_s))
for opt_s, data_s in zip(opt_shape, data_shape)
]
data_np = np.tile(data_np, reps)
slice_list = tuple(slice(0, end) for end in opt_shape)
data_np = data_np[slice_list]
data_np_cuda_ptr = cuda.mem_alloc(data_np.nbytes)
cuda.memcpy_htod(data_np_cuda_ptr,
np.ascontiguousarray(data_np))
self.buffers[name] = data_np_cuda_ptr
ret.append(self.buffers[name])
self.count += 1
return ret
else:
return None
def parse_args():
parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
parser.add_argument('config', help='deploy config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('work_dir', help='work dir to save file')
parser.add_argument(
'--prefix', default='bevdet', help='prefix of the save file name')
parser.add_argument(
'--fp16', action='store_true', help='Whether to use tensorrt fp16')
parser.add_argument(
'--int8', action='store_true', help='Whether to use tensorrt int8')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument('--calib_num', type=int, help='num to calib')
args = parser.parse_args()
return args
def get_plugin_names():
return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
def create_calib_input_data_impl(calib_file: str,
dataloader: DataLoader,
model_partition: bool = False,
metas: list = [],
calib_num = None) -> None:
with h5py.File(calib_file, mode='w') as file:
calib_data_group = file.create_group('calib_data')
assert not model_partition
# create end2end group
input_data_group = calib_data_group.create_group('end2end')
input_group_img = input_data_group.create_group('img')
input_keys = [
'ranks_bev', 'ranks_depth', 'ranks_feat', 'interval_starts',
'interval_lengths'
]
input_groups = []
for input_key in input_keys:
input_groups.append(input_data_group.create_group(input_key))
metas = [
metas[i].int().detach().cpu().numpy() for i in range(len(metas))
]
for data_id, input_data in enumerate(tqdm.tqdm(dataloader)):
# save end2end data
if (calib_num is not None) and (data_id > calib_num):
break
input_tensor = input_data['img_inputs'][0][0]
input_ndarray = input_tensor.squeeze(0).detach().cpu().numpy()
# print(input_ndarray.shape, input_ndarray.dtype)
input_group_img.create_dataset(
str(data_id),
shape=input_ndarray.shape,
compression='gzip',
compression_opts=4,
data=input_ndarray)
for kid, input_key in enumerate(input_keys):
input_groups[kid].create_dataset(
str(data_id),
shape=metas[kid].shape,
compression='gzip',
compression_opts=4,
data=metas[kid])
file.flush()
def create_calib_input_data(calib_file: str,
deploy_cfg: Union[str, mmcv.Config],
model_cfg: Union[str, mmcv.Config],
model_checkpoint: Optional[str] = None,
dataset_cfg: Optional[Union[str,
mmcv.Config]] = None,
dataset_type: str = 'val',
device: str = 'cpu',
metas: list = [None],
calib_num = None) -> None:
"""Create dataset for post-training quantization.
Args:
calib_file (str): The output calibration data file.
deploy_cfg (str | mmcv.Config): Deployment config file or
Config object.
model_cfg (str | mmcv.Config): Model config file or Config object.
model_checkpoint (str): A checkpoint path of PyTorch model,
defaults to `None`.
dataset_cfg (Optional[Union[str, mmcv.Config]], optional): Model
config to provide calibration dataset. If none, use `model_cfg`
as the dataset config. Defaults to None.
dataset_type (str, optional): The dataset type. Defaults to 'val'.
device (str, optional): Device to create dataset. Defaults to 'cpu'.
"""
with no_mp():
if dataset_cfg is None:
dataset_cfg = model_cfg
# load cfg if necessary
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
if dataset_cfg is None:
dataset_cfg = model_cfg
# load dataset_cfg if necessary
dataset_cfg = load_config(dataset_cfg)[0]
from mmdeploy.apis.utils import build_task_processor
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
dataset = task_processor.build_dataset(dataset_cfg, dataset_type)
dataloader = task_processor.build_dataloader(
dataset, 1, 1, dist=False, shuffle=False)
create_calib_input_data_impl(
calib_file, dataloader, model_partition=False, metas=metas, calib_num=calib_num)
def from_onnx(onnx_model: Union[str, onnx.ModelProto],
output_file_prefix: str,
input_shapes: Dict[str, Sequence[int]],
max_workspace_size: int = 0,
fp16_mode: bool = False,
int8_mode: bool = False,
int8_param: Optional[dict] = None,
device_id: int = 0,
log_level: trt.Logger.Severity = trt.Logger.ERROR,
**kwargs) -> trt.ICudaEngine:
"""Create a tensorrt engine from ONNX.
Modified from mmdeploy.backend.tensorrt.utils.from_onnx
"""
import os
old_cuda_device = os.environ.get('CUDA_DEVICE', None)
os.environ['CUDA_DEVICE'] = str(device_id)
import pycuda.autoinit # noqa:F401
if old_cuda_device is not None:
os.environ['CUDA_DEVICE'] = old_cuda_device
else:
os.environ.pop('CUDA_DEVICE')
load_tensorrt_plugin()
# create builder and network
logger = trt.Logger(log_level)
builder = trt.Builder(logger)
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
# parse onnx
parser = trt.OnnxParser(network, logger)
if isinstance(onnx_model, str):
onnx_model = onnx.load(onnx_model)
if not parser.parse(onnx_model.SerializeToString()):
error_msgs = ''
for error in range(parser.num_errors):
error_msgs += f'{parser.get_error(error)}\n'
raise RuntimeError(f'Failed to parse onnx, {error_msgs}')
# config builder
if version.parse(trt.__version__) < version.parse('8'):
builder.max_workspace_size = max_workspace_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size
cuda_version = search_cuda_version()
if cuda_version is not None:
version_major = int(cuda_version.split('.')[0])
if version_major < 11:
# cu11 support cublasLt, so cudnn heuristic tactic should disable CUBLAS_LT # noqa E501
tactic_source = config.get_tactic_sources() - (
1 << int(trt.TacticSource.CUBLAS_LT))
config.set_tactic_sources(tactic_source)
profile = builder.create_optimization_profile()
for input_name, param in input_shapes.items():
min_shape = param['min_shape']
opt_shape = param['opt_shape']
max_shape = param['max_shape']
profile.set_shape(input_name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
if fp16_mode:
if version.parse(trt.__version__) < version.parse('8'):
builder.fp16_mode = fp16_mode
config.set_flag(trt.BuilderFlag.FP16)
if int8_mode:
config.set_flag(trt.BuilderFlag.INT8)
assert int8_param is not None
config.int8_calibrator = HDF5CalibratorBEVDet(
int8_param['calib_file'],
input_shapes,
model_type=int8_param['model_type'],
device_id=device_id,
algorithm=int8_param.get(
'algorithm', trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2))
if version.parse(trt.__version__) < version.parse('8'):
builder.int8_mode = int8_mode
builder.int8_calibrator = config.int8_calibrator
# create engine
engine = builder.build_engine(network, config)
assert engine is not None, 'Failed to create TensorRT engine'
save(engine, output_file_prefix + '.engine')
print('Save engine at ', output_file_prefix + '.engine')
return engine
def main():
args = parse_args()
max_workspace_size = 200*200*256*(2**8)
if not os.path.exists(args.work_dir):
os.makedirs(args.work_dir)
load_tensorrt_plugin()
# assert 'bev_pool_v2' in get_plugin_names(), \
# 'bev_pool_v2 is not in the plugin list of tensorrt, ' \
# 'please install mmdeploy from ' \
# 'https://github.com/HuangJunJie2017/mmdeploy.git'
# if args.int8:
# assert args.fp16
model_prefix = args.prefix
if args.int8:
model_prefix = model_prefix + '_int8'
elif args.fp16:
model_prefix = model_prefix + '_fp16'
cfg = Config.fromfile(args.config)
cfg.model.pretrained = None
cfg.model.type = cfg.model.type + 'TRT'
cfg = compat_cfg(cfg)
cfg.gpu_ids = [0]
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# build the dataloader
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=0, dist=False, shuffle=False)
if isinstance(cfg.data.test, dict):
cfg.data.test.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(
cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
for ds_cfg in cfg.data.test:
ds_cfg.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
test_loader_cfg = {
**test_dataloader_default_args,
**cfg.data.get('test_dataloader', {})
}
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(dataset, **test_loader_cfg)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
# assert model.img_view_transformer.grid_size[0] == 128
# assert model.img_view_transformer.grid_size[1] == 128
# assert model.img_view_transformer.grid_size[2] == 1
if os.path.exists(args.checkpoint):
load_checkpoint(model, args.checkpoint, map_location='cpu')
else:
print(args.checkpoint, " does not exists!")
if args.fuse_conv_bn:
model_prefix = model_prefix + '_fuse'
model = fuse_module(model)
model.cuda()
model.eval()
for i, data in enumerate(data_loader):
inputs = [t.cuda() for t in data['img_inputs'][0]]
img = inputs[0].squeeze(0)
if img.shape[0] > 6:
img = img[:6]
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
metas = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
else:
if model.__class__.__name__ in ['BEVDetOCCTRT']:
metas = model.get_bev_pool_input(inputs)
elif model.__class__.__name__ in ['BEVDepthOCCTRT', 'BEVDepthPanoTRT']:
metas, mlp_input = model.get_bev_pool_input(inputs)
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
metas[2].int().contiguous(), metas[0].int().contiguous(),
metas[3].int().contiguous(), metas[4].int().contiguous())
dynamic_axes={
"ranks_depth" : {0: 'M'},
"ranks_feat" : {0: 'M'},
"ranks_bev" : {0: 'M'},
"interval_starts" : {0: 'N'},
"interval_lengths" : {0: 'N'},
}
input_names=[
'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
'interval_starts', 'interval_lengths'
]
elif model.__class__.__name__ in ['BEVDepthOCCTRT', 'BEVDepthPanoTRT']:
onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
metas[2].int().contiguous(), metas[0].int().contiguous(),
metas[3].int().contiguous(), metas[4].int().contiguous(), mlp_input)
dynamic_axes={
"ranks_depth" : {0: 'M'},
"ranks_feat" : {0: 'M'},
"ranks_bev" : {0: 'M'},
"interval_starts" : {0: 'N'},
"interval_lengths" : {0: 'N'},
# "mlp_input" : {0: 'K'},
}
input_names=[
'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
'interval_starts', 'interval_lengths', 'mlp_input',
]
with torch.no_grad():
if (model.wdet3d == True) and (model.wocc == False) :
output_names=[f'output_{j}' for j in range(6 * len(model.pts_bbox_head.task_heads))]
elif (model.wdet3d == True) and (model.wocc == True) :
output_names=[f'output_{j}' for j in range(1 + 6 * len(model.pts_bbox_head.task_heads))]
elif (model.wdet3d == False) and (model.wocc == True) :
output_names=[f'output_{j}' for j in range(1)]
else:
raise(" At least one of wdet3d and wocc is set as True!! ")
# part1
from functools import partial
model.forward = partial(model.forward_part1,
mlp_input = mlp_input
)
onnx_path = args.work_dir + 'part1.onnx'
torch.onnx.export(
model,
(img.float().contiguous(),),
onnx_path,
export_params=True,
opset_version=11,
input_names=['img'],
output_names=['tran_feat','depth'])
# check onnx model
onnx_model = onnx.load(onnx_path)
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model_file = 'model.onnx'
onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
print('====== onnx is saved at : ', onnx_path)
tran_feat, depth = model.forward(img)
# # from onnxsim import simplify
# # model_simp, check = simplify(onnx_model)
# # assert check, "Simplified ONNX model could not be validated"
# # onnx.save(model_simp, onnx_path)
# # print('====== onnx is saved at : ', onnx_path)
# part2
from functools import partial
model.forward = partial(model.forward_part2,
ranks_depth = onnx_input[1],
ranks_feat = onnx_input[2],
ranks_bev = onnx_input[3],
interval_starts = onnx_input[4],
interval_lengths = onnx_input[5]
)
onnx_path = args.work_dir + 'part2.onnx'
torch.onnx.export(
model,
(tran_feat.float().contiguous(), depth.float().contiguous()),
onnx_path,
export_params=True,
opset_version=11,
input_names=['tran_feat','depth'],
output_names=['bev_feat'])
# check onnx model
onnx_model = onnx.load(onnx_path)
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model_file = 'model.onnx'
onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
print('====== onnx is saved at : ', onnx_path)
bev_pool_feat = model.forward(tran_feat, depth)
# # from onnxsim import simplify
# # model_simp, check = simplify(onnx_model)
# # assert check, "Simplified ONNX model could not be validated"
# # onnx.save(model_simp, onnx_path)
# # print('====== onnx is saved at : ', onnx_path)
# part3
model.forward = model.forward_part3
onnx_path = args.work_dir + 'part3.onnx'
torch.onnx.export(
model,
(bev_pool_feat,),
onnx_path,
export_params=True,
opset_version=11,
input_names=['bev_feat'],
output_names=['occ_pred', 'inst_center_reg', 'inst_center_height', 'inst_center_heatmap'])
# check onnx model
onnx_model = onnx.load(onnx_path)
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model_file = 'model.onnx'
onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
print('====== onnx is saved at : ', onnx_path)
# # from onnxsim import simplify
# # model_simp, check = simplify(onnx_model)
# # assert check, "Simplified ONNX model could not be validated"
# # onnx.save(model_simp, onnx_path)
# # print('====== onnx is saved at : ', onnx_path)
from functools import partial
model.forward = partial(model.forward_ori,
ranks_depth = onnx_input[1],
ranks_feat = onnx_input[2],
ranks_bev = onnx_input[3],
interval_starts = onnx_input[4],
interval_lengths = onnx_input[5],
mlp_input = mlp_input
)
torch.onnx.export(
model,
(onnx_input[0],),
args.work_dir + model_prefix + '.onnx',
opset_version=11,
dynamic_axes=dynamic_axes,
input_names=['img'],
output_names=['occ_pred', 'inst_center_reg', 'inst_center_height', 'inst_center_heatmap']
)
print('output_names:', output_names)
print('====== onnx is saved at : ', args.work_dir + model_prefix + '.onnx')
# check onnx model
onnx_model = onnx.load(args.work_dir + model_prefix + '.onnx')
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
outs = model.forward(onnx_input[0])
model.forward = model.forward_with_argmax
out = model(*onnx_input)
output_names = [f'cls_occ_label']
torch.onnx.export(
model,
onnx_input,
args.work_dir + model_prefix + '_with_argmax.onnx',
opset_version=11,
dynamic_axes=dynamic_axes,
input_names=input_names,
output_names=output_names)
print('output_names:', output_names)
print('====== onnx is saved at : ', args.work_dir + model_prefix + '_with_argmax.onnx')
# check onnx model
onnx_model = onnx.load(args.work_dir + model_prefix + '_with_argmax.onnx')
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
break
return
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import warnings
from os import path as osp
from pathlib import Path
import mmcv
import numpy as np
from mmcv import Config, DictAction, mkdir_or_exist
from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
show_seg_result)
from mmdet3d.datasets import build_dataset
def parse_args():
parser = argparse.ArgumentParser(description='Browse a dataset')
parser.add_argument('config', help='train config file path')
parser.add_argument(
'--skip-type',
type=str,
nargs='+',
default=['Normalize'],
help='skip some useless pipeline')
parser.add_argument(
'--output-dir',
default=None,
type=str,
help='If there is no display interface, you can save it')
parser.add_argument(
'--task',
type=str,
choices=['det', 'seg', 'multi_modality-det', 'mono-det'],
help='Determine the visualization method depending on the task.')
parser.add_argument(
'--aug',
action='store_true',
help='Whether to visualize augmented datasets or original dataset.')
parser.add_argument(
'--online',
action='store_true',
help='Whether to perform online visualization. Note that you often '
'need a monitor to do so.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def build_data_cfg(config_path, skip_type, aug, cfg_options):
"""Build data config for loading visualization data."""
cfg = Config.fromfile(config_path)
if cfg_options is not None:
cfg.merge_from_dict(cfg_options)
# extract inner dataset of `RepeatDataset` as `cfg.data.train`
# so we don't need to worry about it later
if cfg.data.train['type'] == 'RepeatDataset':
cfg.data.train = cfg.data.train.dataset
# use only first dataset for `ConcatDataset`
if cfg.data.train['type'] == 'ConcatDataset':
cfg.data.train = cfg.data.train.datasets[0]
train_data_cfg = cfg.data.train
if aug:
show_pipeline = cfg.train_pipeline
else:
show_pipeline = cfg.eval_pipeline
for i in range(len(cfg.train_pipeline)):
if cfg.train_pipeline[i]['type'] == 'LoadAnnotations3D':
show_pipeline.insert(i, cfg.train_pipeline[i])
# Collect points as well as labels
if cfg.train_pipeline[i]['type'] == 'Collect3D':
if show_pipeline[-1]['type'] == 'Collect3D':
show_pipeline[-1] = cfg.train_pipeline[i]
else:
show_pipeline.append(cfg.train_pipeline[i])
train_data_cfg['pipeline'] = [
x for x in show_pipeline if x['type'] not in skip_type
]
return cfg
def to_depth_mode(points, bboxes):
"""Convert points and bboxes to Depth Coord and Depth Box mode."""
if points is not None:
points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
if bboxes is not None:
bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
Box3DMode.DEPTH)
return points, bboxes
def show_det_data(input, out_dir, show=False):
"""Visualize 3D point cloud and 3D bboxes."""
img_metas = input['img_metas']._data
points = input['points']._data.numpy()
gt_bboxes = input['gt_bboxes_3d']._data.tensor
if img_metas['box_mode_3d'] != Box3DMode.DEPTH:
points, gt_bboxes = to_depth_mode(points, gt_bboxes)
filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
show_result(
points,
gt_bboxes.clone(),
None,
out_dir,
filename,
show=show,
snapshot=True)
def show_seg_data(input, out_dir, show=False):
"""Visualize 3D point cloud and segmentation mask."""
img_metas = input['img_metas']._data
points = input['points']._data.numpy()
gt_seg = input['pts_semantic_mask']._data.numpy()
filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
show_seg_result(
points,
gt_seg.copy(),
None,
out_dir,
filename,
np.array(img_metas['PALETTE']),
img_metas['ignore_index'],
show=show,
snapshot=True)
def show_proj_bbox_img(input, out_dir, show=False, is_nus_mono=False):
"""Visualize 3D bboxes on 2D image by projection."""
gt_bboxes = input['gt_bboxes_3d']._data
img_metas = input['img_metas']._data
img = input['img']._data.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
# no 3D gt bboxes, just show img
if gt_bboxes.tensor.shape[0] == 0:
gt_bboxes = None
filename = Path(img_metas['filename']).name
if isinstance(gt_bboxes, DepthInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
None,
out_dir,
filename,
box_mode='depth',
img_metas=img_metas,
show=show)
elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
img_metas['lidar2img'],
out_dir,
filename,
box_mode='lidar',
img_metas=img_metas,
show=show)
elif isinstance(gt_bboxes, CameraInstance3DBoxes):
show_multi_modality_result(
img,
gt_bboxes,
None,
img_metas['cam2img'],
out_dir,
filename,
box_mode='camera',
img_metas=img_metas,
show=show)
else:
# can't project, just show img
warnings.warn(
f'unrecognized gt box type {type(gt_bboxes)}, only show image')
show_multi_modality_result(
img, None, None, None, out_dir, filename, show=show)
def main():
args = parse_args()
if args.output_dir is not None:
mkdir_or_exist(args.output_dir)
cfg = build_data_cfg(args.config, args.skip_type, args.aug,
args.cfg_options)
try:
dataset = build_dataset(
cfg.data.train, default_args=dict(filter_empty_gt=False))
except TypeError: # seg dataset doesn't have `filter_empty_gt` key
dataset = build_dataset(cfg.data.train)
dataset_type = cfg.dataset_type
# configure visualization mode
vis_task = args.task # 'det', 'seg', 'multi_modality-det', 'mono-det'
progress_bar = mmcv.ProgressBar(len(dataset))
for input in dataset:
if vis_task in ['det', 'multi_modality-det']:
# show 3D bboxes on 3D point clouds
show_det_data(input, args.output_dir, show=args.online)
if vis_task in ['multi_modality-det', 'mono-det']:
# project 3D bboxes to 2D image
show_proj_bbox_img(
input,
args.output_dir,
show=args.online,
is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
elif vis_task in ['seg']:
# show 3D segmentation mask on 3D point clouds
show_seg_data(input, args.output_dir, show=args.online)
progress_bar.update()
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import torch
from mmcv.runner import save_checkpoint
from torch import nn as nn
from mmdet3d.apis import init_model
def fuse_conv_bn(conv, bn):
"""During inference, the functionary of batch norm layers is turned off but
only the mean and var alone channels are used, which exposes the chance to
fuse it with the preceding conv layers to save computations and simplify
network structures."""
conv_w = conv.weight
conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
bn.running_mean)
factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
conv.weight = nn.Parameter(conv_w *
factor.reshape([conv.out_channels, 1, 1, 1]))
conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
return conv
def fuse_module(m):
last_conv = None
last_conv_name = None
for name, child in m.named_children():
if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
if last_conv is None: # only fuse BN that is after Conv
continue
fused_conv = fuse_conv_bn(last_conv, child)
m._modules[last_conv_name] = fused_conv
# To reduce changes, set BN as Identity instead of deleting it.
m._modules[name] = nn.Identity()
last_conv = None
elif isinstance(child, nn.Conv2d):
last_conv = child
last_conv_name = name
else:
fuse_module(child)
return m
def parse_args():
parser = argparse.ArgumentParser(
description='fuse Conv and BN layers in a model')
parser.add_argument('config', help='config file path')
parser.add_argument('checkpoint', help='checkpoint file path')
parser.add_argument('out', help='output path of the converted model')
args = parser.parse_args()
return args
def main():
args = parse_args()
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint)
# fuse conv and bn layers of the model
fused_model = fuse_module(model)
save_checkpoint(fused_model, args.out)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
from mmcv import Config, DictAction
def parse_args():
parser = argparse.ArgumentParser(description='Print the whole config')
parser.add_argument('config', help='config file path')
parser.add_argument(
'--options', nargs='+', action=DictAction, help='arguments in dict')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.options is not None:
cfg.merge_from_dict(args.options)
print(f'Config:\n{cfg.pretty_text}')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import mmcv
from mmcv import Config
from mmdet3d.datasets import build_dataset
def parse_args():
parser = argparse.ArgumentParser(
description='MMDet3D visualize the results')
parser.add_argument('config', help='test config file path')
parser.add_argument('--result', help='results file in pickle format')
parser.add_argument(
'--show-dir', help='directory where visualize results will be saved')
args = parser.parse_args()
return args
def main():
args = parse_args()
if args.result is not None and \
not args.result.endswith(('.pkl', '.pickle')):
raise ValueError('The results file must be a pkl file.')
cfg = Config.fromfile(args.config)
cfg.data.test.test_mode = True
# build the dataset
dataset = build_dataset(cfg.data.test)
results = mmcv.load(args.result)
if getattr(dataset, 'show', None) is not None:
# data loading pipeline for showing
eval_pipeline = cfg.get('eval_pipeline', {})
if eval_pipeline:
dataset.show(results, args.show_dir, pipeline=eval_pipeline)
else:
dataset.show(results, args.show_dir) # use default pipeline
else:
raise NotImplementedError(
'Show is not implemented for dataset {}!'.format(
type(dataset).__name__))
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import tempfile
import torch
from mmcv import Config
from mmcv.runner import load_state_dict
from mmdet3d.models import build_detector
def parse_args():
parser = argparse.ArgumentParser(
description='MMDet3D upgrade model version(before v0.6.0) of H3DNet')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--out', help='path of the output checkpoint file')
args = parser.parse_args()
return args
def parse_config(config_strings):
"""Parse config from strings.
Args:
config_strings (string): strings of model config.
Returns:
Config: model config
"""
temp_file = tempfile.NamedTemporaryFile()
config_path = f'{temp_file.name}.py'
with open(config_path, 'w') as f:
f.write(config_strings)
config = Config.fromfile(config_path)
# Update backbone config
if 'pool_mod' in config.model.backbone.backbones:
config.model.backbone.backbones.pop('pool_mod')
if 'sa_cfg' not in config.model.backbone:
config.model.backbone['sa_cfg'] = dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)
if 'type' not in config.model.rpn_head.vote_aggregation_cfg:
config.model.rpn_head.vote_aggregation_cfg['type'] = 'PointSAModule'
# Update rpn_head config
if 'pred_layer_cfg' not in config.model.rpn_head:
config.model.rpn_head['pred_layer_cfg'] = dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True)
if 'feat_channels' in config.model.rpn_head:
config.model.rpn_head.pop('feat_channels')
if 'vote_moudule_cfg' in config.model.rpn_head:
config.model.rpn_head['vote_module_cfg'] = config.model.rpn_head.pop(
'vote_moudule_cfg')
if config.model.rpn_head.vote_aggregation_cfg.use_xyz:
config.model.rpn_head.vote_aggregation_cfg.mlp_channels[0] -= 3
for cfg in config.model.roi_head.primitive_list:
cfg['vote_module_cfg'] = cfg.pop('vote_moudule_cfg')
cfg.vote_aggregation_cfg.mlp_channels[0] -= 3
if 'type' not in cfg.vote_aggregation_cfg:
cfg.vote_aggregation_cfg['type'] = 'PointSAModule'
if 'type' not in config.model.roi_head.bbox_head.suface_matching_cfg:
config.model.roi_head.bbox_head.suface_matching_cfg[
'type'] = 'PointSAModule'
if config.model.roi_head.bbox_head.suface_matching_cfg.use_xyz:
config.model.roi_head.bbox_head.suface_matching_cfg.mlp_channels[
0] -= 3
if 'type' not in config.model.roi_head.bbox_head.line_matching_cfg:
config.model.roi_head.bbox_head.line_matching_cfg[
'type'] = 'PointSAModule'
if config.model.roi_head.bbox_head.line_matching_cfg.use_xyz:
config.model.roi_head.bbox_head.line_matching_cfg.mlp_channels[0] -= 3
if 'proposal_module_cfg' in config.model.roi_head.bbox_head:
config.model.roi_head.bbox_head.pop('proposal_module_cfg')
temp_file.close()
return config
def main():
"""Convert keys in checkpoints for VoteNet.
There can be some breaking changes during the development of mmdetection3d,
and this tool is used for upgrading checkpoints trained with old versions
(before v0.6.0) to the latest one.
"""
args = parse_args()
checkpoint = torch.load(args.checkpoint)
cfg = parse_config(checkpoint['meta']['config'])
# Build the model and load checkpoint
model = build_detector(
cfg.model,
train_cfg=cfg.get('train_cfg'),
test_cfg=cfg.get('test_cfg'))
orig_ckpt = checkpoint['state_dict']
converted_ckpt = orig_ckpt.copy()
if cfg['dataset_type'] == 'ScanNetDataset':
NUM_CLASSES = 18
elif cfg['dataset_type'] == 'SUNRGBDDataset':
NUM_CLASSES = 10
else:
raise NotImplementedError
RENAME_PREFIX = {
'rpn_head.conv_pred.0': 'rpn_head.conv_pred.shared_convs.layer0',
'rpn_head.conv_pred.1': 'rpn_head.conv_pred.shared_convs.layer1'
}
DEL_KEYS = [
'rpn_head.conv_pred.0.bn.num_batches_tracked',
'rpn_head.conv_pred.1.bn.num_batches_tracked'
]
EXTRACT_KEYS = {
'rpn_head.conv_pred.conv_cls.weight':
('rpn_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),
'rpn_head.conv_pred.conv_cls.bias':
('rpn_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),
'rpn_head.conv_pred.conv_reg.weight':
('rpn_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),
'rpn_head.conv_pred.conv_reg.bias':
('rpn_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])
}
# Delete some useless keys
for key in DEL_KEYS:
converted_ckpt.pop(key)
# Rename keys with specific prefix
RENAME_KEYS = dict()
for old_key in converted_ckpt.keys():
for rename_prefix in RENAME_PREFIX.keys():
if rename_prefix in old_key:
new_key = old_key.replace(rename_prefix,
RENAME_PREFIX[rename_prefix])
RENAME_KEYS[new_key] = old_key
for new_key, old_key in RENAME_KEYS.items():
converted_ckpt[new_key] = converted_ckpt.pop(old_key)
# Extract weights and rename the keys
for new_key, (old_key, indices) in EXTRACT_KEYS.items():
cur_layers = orig_ckpt[old_key]
converted_layers = []
for (start, end) in indices:
if end != -1:
converted_layers.append(cur_layers[start:end])
else:
converted_layers.append(cur_layers[start:])
converted_layers = torch.cat(converted_layers, 0)
converted_ckpt[new_key] = converted_layers
if old_key in converted_ckpt.keys():
converted_ckpt.pop(old_key)
# Check the converted checkpoint by loading to the model
load_state_dict(model, converted_ckpt, strict=True)
checkpoint['state_dict'] = converted_ckpt
torch.save(checkpoint, args.out)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import tempfile
import torch
from mmcv import Config
from mmcv.runner import load_state_dict
from mmdet3d.models import build_detector
def parse_args():
parser = argparse.ArgumentParser(
description='MMDet3D upgrade model version(before v0.6.0) of VoteNet')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--out', help='path of the output checkpoint file')
args = parser.parse_args()
return args
def parse_config(config_strings):
"""Parse config from strings.
Args:
config_strings (string): strings of model config.
Returns:
Config: model config
"""
temp_file = tempfile.NamedTemporaryFile()
config_path = f'{temp_file.name}.py'
with open(config_path, 'w') as f:
f.write(config_strings)
config = Config.fromfile(config_path)
# Update backbone config
if 'pool_mod' in config.model.backbone:
config.model.backbone.pop('pool_mod')
if 'sa_cfg' not in config.model.backbone:
config.model.backbone['sa_cfg'] = dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)
if 'type' not in config.model.bbox_head.vote_aggregation_cfg:
config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule'
# Update bbox_head config
if 'pred_layer_cfg' not in config.model.bbox_head:
config.model.bbox_head['pred_layer_cfg'] = dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True)
if 'feat_channels' in config.model.bbox_head:
config.model.bbox_head.pop('feat_channels')
if 'vote_moudule_cfg' in config.model.bbox_head:
config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop(
'vote_moudule_cfg')
if config.model.bbox_head.vote_aggregation_cfg.use_xyz:
config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3
temp_file.close()
return config
def main():
"""Convert keys in checkpoints for VoteNet.
There can be some breaking changes during the development of mmdetection3d,
and this tool is used for upgrading checkpoints trained with old versions
(before v0.6.0) to the latest one.
"""
args = parse_args()
checkpoint = torch.load(args.checkpoint)
cfg = parse_config(checkpoint['meta']['config'])
# Build the model and load checkpoint
model = build_detector(
cfg.model,
train_cfg=cfg.get('train_cfg'),
test_cfg=cfg.get('test_cfg'))
orig_ckpt = checkpoint['state_dict']
converted_ckpt = orig_ckpt.copy()
if cfg['dataset_type'] == 'ScanNetDataset':
NUM_CLASSES = 18
elif cfg['dataset_type'] == 'SUNRGBDDataset':
NUM_CLASSES = 10
else:
raise NotImplementedError
RENAME_PREFIX = {
'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0',
'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1'
}
DEL_KEYS = [
'bbox_head.conv_pred.0.bn.num_batches_tracked',
'bbox_head.conv_pred.1.bn.num_batches_tracked'
]
EXTRACT_KEYS = {
'bbox_head.conv_pred.conv_cls.weight':
('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),
'bbox_head.conv_pred.conv_cls.bias':
('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),
'bbox_head.conv_pred.conv_reg.weight':
('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),
'bbox_head.conv_pred.conv_reg.bias':
('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])
}
# Delete some useless keys
for key in DEL_KEYS:
converted_ckpt.pop(key)
# Rename keys with specific prefix
RENAME_KEYS = dict()
for old_key in converted_ckpt.keys():
for rename_prefix in RENAME_PREFIX.keys():
if rename_prefix in old_key:
new_key = old_key.replace(rename_prefix,
RENAME_PREFIX[rename_prefix])
RENAME_KEYS[new_key] = old_key
for new_key, old_key in RENAME_KEYS.items():
converted_ckpt[new_key] = converted_ckpt.pop(old_key)
# Extract weights and rename the keys
for new_key, (old_key, indices) in EXTRACT_KEYS.items():
cur_layers = orig_ckpt[old_key]
converted_layers = []
for (start, end) in indices:
if end != -1:
converted_layers.append(cur_layers[start:end])
else:
converted_layers.append(cur_layers[start:])
converted_layers = torch.cat(converted_layers, 0)
converted_ckpt[new_key] = converted_layers
if old_key in converted_ckpt.keys():
converted_ckpt.pop(old_key)
# Check the converted checkpoint by loading to the model
load_state_dict(model, converted_ckpt, strict=True)
checkpoint['state_dict'] = converted_ckpt
torch.save(checkpoint, args.out)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import subprocess
import torch
def parse_args():
parser = argparse.ArgumentParser(
description='Process a checkpoint to be published')
parser.add_argument('in_file', help='input checkpoint filename')
parser.add_argument('out_file', help='output checkpoint filename')
args = parser.parse_args()
return args
def process_checkpoint(in_file, out_file):
checkpoint = torch.load(in_file, map_location='cpu')
# remove optimizer for smaller file size
if 'optimizer' in checkpoint:
del checkpoint['optimizer']
# if it is necessary to remove some sensitive data in checkpoint['meta'],
# add the code here.
torch.save(checkpoint, out_file)
sha = subprocess.check_output(['sha256sum', out_file]).decode()
final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
subprocess.Popen(['mv', out_file, final_file])
def main():
args = parse_args()
process_checkpoint(args.in_file, args.out_file)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
from collections import OrderedDict
import torch
def convert_stem(model_key, model_weight, state_dict, converted_names):
new_key = model_key.replace('stem.conv', 'conv1')
new_key = new_key.replace('stem.bn', 'bn1')
state_dict[new_key] = model_weight
converted_names.add(model_key)
print(f'Convert {model_key} to {new_key}')
def convert_head(model_key, model_weight, state_dict, converted_names):
new_key = model_key.replace('head.fc', 'fc')
state_dict[new_key] = model_weight
converted_names.add(model_key)
print(f'Convert {model_key} to {new_key}')
def convert_reslayer(model_key, model_weight, state_dict, converted_names):
split_keys = model_key.split('.')
layer, block, module = split_keys[:3]
block_id = int(block[1:])
layer_name = f'layer{int(layer[1:])}'
block_name = f'{block_id - 1}'
if block_id == 1 and module == 'bn':
new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
elif block_id == 1 and module == 'proj':
new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
elif module == 'f':
if split_keys[3] == 'a_bn':
module_name = 'bn1'
elif split_keys[3] == 'b_bn':
module_name = 'bn2'
elif split_keys[3] == 'c_bn':
module_name = 'bn3'
elif split_keys[3] == 'a':
module_name = 'conv1'
elif split_keys[3] == 'b':
module_name = 'conv2'
elif split_keys[3] == 'c':
module_name = 'conv3'
new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
else:
raise ValueError(f'Unsupported conversion of key {model_key}')
print(f'Convert {model_key} to {new_key}')
state_dict[new_key] = model_weight
converted_names.add(model_key)
def convert(src, dst):
"""Convert keys in pycls pretrained RegNet models to mmdet style."""
# load caffe model
regnet_model = torch.load(src)
blobs = regnet_model['model_state']
# convert to pytorch style
state_dict = OrderedDict()
converted_names = set()
for key, weight in blobs.items():
if 'stem' in key:
convert_stem(key, weight, state_dict, converted_names)
elif 'head' in key:
convert_head(key, weight, state_dict, converted_names)
elif key.startswith('s'):
convert_reslayer(key, weight, state_dict, converted_names)
# check if all layers are converted
for key in blobs:
if key not in converted_names:
print(f'not converted: {key}')
# save checkpoint
checkpoint = dict()
checkpoint['state_dict'] = state_dict
torch.save(checkpoint, dst)
def main():
parser = argparse.ArgumentParser(description='Convert model keys')
parser.add_argument('src', help='src detectron model path')
parser.add_argument('dst', help='save path')
args = parser.parse_args()
convert(args.src, args.dst)
if __name__ == '__main__':
main()
import matplotlib.pyplot as plt
import mpl_toolkits.axisartist as axisartist
# fig, _ = plt.subplots(3, 1, figsize=(5, 12))
fig, _ = plt.subplots(1, 3, figsize=(15, 5))
fig.set_tight_layout(True)
# 设置全局颜色
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=['steelblue'])
# plt.subplot(3, 1, 3)
plt.subplot(1, 3, 3)
# ax = axisartist.Subplot(fig, 111)
# #将绘图区对象添加到画布中
# fig.add_axes(ax)
# #通过set_axisline_style方法设置绘图区的底部及左侧坐标轴样式
# #"-|>"代表实心箭头:"->"代表空心箭头
# ax.axis["bottom"].set_axisline_style("->", size = 1.5)
# ax.axis["left"].set_axisline_style("->", size = 1.5)
# #通过set_visible方法设置绘图区的顶部及右侧坐标轴隐藏
# ax.axis["top"].set_visible(False)
# ax.axis["right"].set_visible(False)
fontsize_ = 10
# SparseOCC
fps = [17.3]
ray_iou = [14.1]
labels = ['SparseOcc(8f)']
plt.scatter(fps, ray_iou, color='dodgerblue')
# 添加文本
plt.text(fps[0]+1.5, ray_iou[0]-0.1, labels[0], fontsize=fontsize_, ha='center', va='top')
# Panoptic-FlashOcc
# fps = [29.0, 22.6, 22.0, 20.3] # 3090
fps = [39.8, 35.2, 30.4, 30.2] # a100-80g
# ray_iou = [12.6, 12.9, 14.2, 15.8]
ray_iou = [12.89, 13.18, 14.52, 15.96]
labels = ['Panoptic-\nFlashOcc-Tiny(1f)', 'Panoptic-\nFlashOcc(1f)', 'Panoptic-\nFlashOcc(2f)', 'Panoptic-\nFlashOcc(8f)']
plt.scatter(fps, ray_iou, color='orange')
# 添加文本
plt.text(fps[0]-3.5, ray_iou[0]+0.0, labels[0], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[1]-3.0, ray_iou[1]+0.3, labels[1], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[2]+0.4, ray_iou[2], labels[2], fontsize=fontsize_, ha='left', va='bottom')
plt.text(fps[3]+2., ray_iou[3]-0.1, labels[3], fontsize=fontsize_, ha='center', va='bottom')
# 连接散点并画线
plt.plot(fps, ray_iou, color='orange', linestyle='-') # 修改线型
plt.grid(True)
plt.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
# 设置字体大小和粗细
font = {'family': 'times new roman',
'color': 'black',
'weight': 'normal',
'size': 16,
}
# 设置图表标题和坐标轴标签
plt.xlabel('FPS (Hz)', fontdict=font)
plt.ylabel('Occ3D-nuScenes (RayPQ)', fontdict=font)
# 设置 y 轴范围
plt.ylim(11.5, 16.5)
# 设置 y 轴刻度
plt.yticks([12, 13, 14, 15, 16])
# 设置 x 轴范围
# plt.xlim(16, 40)
plt.xlim(0, 50)
# 设置 y 轴刻度
# plt.xticks([15, 20, 25, 30, 35, 40])
plt.xticks([10, 20, 30, 40, 50])
# plt.subplot(3, 1, 2)
plt.subplot(1, 3, 2)
# BEVFormer
fps = [3.0]
ray_iou = [23.7]
labels = ['BEVFormer']
plt.scatter(fps, ray_iou, color='dodgerblue')
# 添加文本
plt.text(fps[0]+3.2, ray_iou[0]+0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
# FB-Occ
fps = [10.3]
ray_iou = [27.9]
labels = ['FB-Occ']
plt.scatter(fps, ray_iou, color='dodgerblue')
# 添加文本
plt.text(fps[0], ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
# SparseOCC
fps = [17.3, 12.5]
ray_iou = [30.3, 30.9]
labels = ['SparseOcc(8f)', 'SparseOcc(16f)']
plt.scatter(fps, ray_iou, color='dodgerblue')
# 添加文本
plt.text(fps[0], ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[1], ray_iou[1]+0.2, labels[1], fontsize=fontsize_, ha='center', va='bottom')
# 连接散点并画线
plt.plot(fps, ray_iou, color='dodgerblue', linestyle='-') # 修改线型
# Panoptic-FlashOcc
# fps = [29.0, 22.6, 22.0, 20.3] # 3090
fps = [43.9, 38.7, 35.9, 35.6] # a100 80g
ray_iou = [29.1, 29.4, 30.3, 31.6]
labels = ['Panoptic-\nFlashOcc-Tiny(1f)', 'Panoptic-\nFlashOcc(1f)', 'Panoptic-\nFlashOcc(2f)', 'Panoptic-\nFlashOcc(8f)']
plt.scatter(fps, ray_iou, color='orange')
# 添加文本
plt.text(fps[0]-2.0, ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[1]-3.5, ray_iou[1]+0.4, labels[1], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[2]-7.0, ray_iou[2]-0.3, labels[2], fontsize=fontsize_, ha='left', va='bottom')
plt.text(fps[3]-4.0, ray_iou[3]-0.5, labels[3], fontsize=fontsize_, ha='center', va='bottom')
# 连接散点并画线
plt.plot(fps, ray_iou, color='orange', linestyle='-') # 修改线型
plt.grid(True)
plt.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
# 设置字体大小和粗细
font = {'family': 'times new roman',
'color': 'black',
'weight': 'normal',
'size': 16,
}
# 设置图表标题和坐标轴标签
plt.xlabel('FPS (Hz)', fontdict=font)
plt.ylabel('Occ3D-nuScenes (mIoU)', fontdict=font)
# 设置 y 轴范围
plt.ylim(23, 33)
# 设置 y 轴刻度
plt.yticks([24, 26, 28, 30, 32])
# 设置 x 轴范围
plt.xlim(2, 45)
# 设置 y 轴刻度
# plt.xticks([5, 10, 15, 20, 25, 30, 35])
plt.xticks([10, 20, 30, 40, 50])
# plt.xticks([15, 20, 25, 30, 35, 40, 45])
# plt.subplot(3, 1, 1)
plt.subplot(1, 3, 1)
fps = [2.1, 5.4, 3.2, 7.6]
ray_iou = [32.4, 29.6, 32.6, 33.5]
labels = ['BEVFormer', 'BEVDet-Occ', 'BEVDet-Occ-\nLongterm', 'FB-Occ']
# 绘制散点图
plt.scatter(fps, ray_iou, color='dodgerblue')
# 添加文本
for i in range(len(fps)):
if labels[i] == 'BEVDet-Occ-\nLongterm':
plt.text(fps[i]+5.3, ray_iou[i]-0.2, labels[i], fontsize=fontsize_, ha='center', va='bottom') # 通过减去0.5调整文本位置
elif labels[i] == 'BEVFormer':
plt.text(fps[i]+2.3, ray_iou[i]-0.2, labels[i], fontsize=fontsize_, ha='center', va='top') # 通过减去0.5调整文本位置
elif labels[i] == 'BEVDet-Occ':
plt.text(fps[i]+0.2, ray_iou[i]+0.5, labels[i], fontsize=fontsize_, ha='center', va='top') # 通过减去0.5调整文本位置
else:
plt.text(fps[i]+0.2, ray_iou[i]+0.4, labels[i], fontsize=fontsize_, ha='center', va='top') # 通过减去0.5调整文本位置
# SparseOCC
fps = [17.3, 12.5]
ray_iou = [34.0, 35.1]
labels = ['SparseOcc(8f)', 'SparseOcc(16f)']
plt.scatter(fps, ray_iou, color='dodgerblue')
# 添加文本
plt.text(fps[0], ray_iou[0]-0.2, labels[0], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[1], ray_iou[1]+0.2, labels[1], fontsize=fontsize_, ha='center', va='bottom')
# 连接散点并画线
plt.plot(fps, ray_iou, color='dodgerblue', linestyle='-') # 修改线型
# Panoptic-FlashOcc
# fps = [29.0, 22.6, 22.0, 20.3]
fps = [43.9, 38.7, 35.9, 35.6] # a100 80g
ray_iou = [34.81, 35.22, 36.76, 38.50]
labels = ['Panoptic-\nFlashOcc-Tiny(1f)', 'Panoptic-\nFlashOcc(1f)', 'Panoptic-\nFlashOcc(2f)', 'Panoptic-\nFlashOcc(8f)']
plt.scatter(fps, ray_iou, color='orange')
# 添加文本
plt.text(fps[0]-4.0, ray_iou[0]+0.0, labels[0], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[1]-4.2, ray_iou[1]+0.4, labels[1], fontsize=fontsize_, ha='center', va='top')
plt.text(fps[2]-8.5, ray_iou[2]-0.3, labels[2], fontsize=fontsize_, ha='left', va='bottom')
plt.text(fps[3]-4.0, ray_iou[3]-0.5, labels[3], fontsize=fontsize_, ha='center', va='bottom')
# 连接散点并画线
plt.plot(fps, ray_iou, color='orange', linestyle='-') # 修改线型
plt.grid(True)
plt.grid(color='gray', linestyle='--', linewidth=1, alpha=0.3)
# 设置字体大小和粗细
font = {'family': 'times new roman',
'color': 'black',
'weight': 'normal',
'size': 16,
}
# 设置图表标题和坐标轴标签
plt.xlabel('FPS (Hz)', fontdict=font)
plt.ylabel('Occ3D-nuScenes (RayIoU)', fontdict=font)
# 设置 y 轴范围
plt.ylim(29, 39)
# 设置 y 轴刻度
plt.yticks([30, 32, 34, 36, 38])
# 设置 x 轴范围
plt.xlim(0, 45)
# 设置 y 轴刻度
# plt.xticks([0, 5, 10, 15, 20, 25, 30, 35])
plt.xticks([10, 20, 30, 40, 50])
# 保存图像
plt.savefig('scatter_plot.png')
plt.savefig('scatter_plot.pdf')
# 显示图表
plt.show()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment