Commit d2b71343 authored by 雍大凯's avatar 雍大凯
Browse files

add code

parent 69e57885
# Copyright (c) OpenMMLab. All rights reserved.
import sys
import argparse
import time
import os
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
from tools.misc.fuse_conv_bn import fuse_module
sys.path.insert(0, os.getcwd())
print(sys.path)
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--samples', default=500, help='samples to benchmark')
parser.add_argument(
'--log-interval', default=50, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--w_pano',
action='store_true')
parser.add_argument(
'--w_panoproc',
action='store_true')
parser.add_argument(
'--no-acceleration',
action='store_true',
help='Omit the pre-computation acceleration')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=0,
dist=False,
shuffle=False)
# build the model and load checkpoint
if not args.no_acceleration:
cfg.model.img_view_transformer.accelerate=True
cfg.model.train_cfg = None
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_module(model)
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0
# benchmark with several samples and take the average
# for i, data_ori in enumerate(data_loader):
# if i == 0:
# break
# import copy
# for i in range(500):
# data = copy.deepcopy(data_ori)
for i, data in enumerate(data_loader):
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, rescale=True,
w_pano=args.w_pano,
w_panoproc=args.w_panoproc,
**data)
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{i + 1:<3}/ {args.samples}], '
f'fps: {fps:.1f} img / s')
if (i + 1) == args.samples:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall \nfps: {fps:.2f} img / s '
f'\ninference time: {1000 / fps:.2f} ms')
break
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import time
import os
import sys
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
from tools.misc.fuse_conv_bn import fuse_module
sys.path.insert(0, os.getcwd())
print(sys.path)
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--samples', default=400, help='samples to benchmark')
parser.add_argument(
'--log-interval', default=50, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--w_pano',
action='store_true')
parser.add_argument(
'--w_panoproc',
action='store_true')
parser.add_argument(
'--no-acceleration',
action='store_true',
help='Omit the pre-computation acceleration')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=0,
dist=False,
shuffle=False)
# build the model and load checkpoint
cfg.model.train_cfg = None
cfg.model.align_after_view_transfromation=True
if not args.no_acceleration:
cfg.model.img_view_transformer.accelerate=True
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_module(model)
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0
# benchmark with several samples and take the average
for i, data in enumerate(data_loader):
inputs = [d.cuda() for d in data['img_inputs'][0]]
with torch.no_grad():
feat_prev, inputs = model.module.extract_img_feat(
inputs, pred_prev=True, img_metas=None)
data['img_inputs'][0] = inputs
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(
return_loss=False,
rescale=True,
sequential=True,
feat_prev=feat_prev,
w_pano=args.w_pano,
w_panoproc=args.w_panoproc,
**data)
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{i + 1:<3}/ {args.samples}], '
f'fps: {fps:.1f} img / s')
if (i + 1) == args.samples:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall \nfps: {fps:.2f} img / s '
f'\ninference time: {1000 / fps:.2f} ms')
break
if __name__ == '__main__':
main()
\ No newline at end of file
import time
from typing import Dict, Optional, Sequence, Union
import os
from os import path as osp
import sys
sys.path.insert(0, os.getcwd())
import tensorrt as trt
import torch
import torch.onnx
from mmcv import Config
from mmdeploy.backend.tensorrt import load_tensorrt_plugin
try:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import compat_cfg
except ImportError:
from mmdet3d.utils import compat_cfg
import argparse
from mmdet3d.core import bbox3d2result
from mmdet3d.core.bbox.structures.box_3d_mode import LiDARInstance3DBoxes
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_model
def parse_args():
parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
parser.add_argument('config', help='deploy config file path')
parser.add_argument('engine', help='checkpoint file')
parser.add_argument('--samples', default=500, help='samples to benchmark')
parser.add_argument('--postprocessing', action='store_true')
parser.add_argument('--eval', action='store_true')
parser.add_argument('--prefetch', action='store_true',
help='use prefetch to accelerate the data loading, '
'the inference speed is sightly degenerated due '
'to the computational occupancy of prefetch')
args = parser.parse_args()
return args
def torch_dtype_from_trt(dtype: trt.DataType) -> torch.dtype:
"""Convert pytorch dtype to TensorRT dtype.
Args:
dtype (str.DataType): The data type in tensorrt.
Returns:
torch.dtype: The corresponding data type in torch.
"""
if dtype == trt.bool:
return torch.bool
elif dtype == trt.int8:
return torch.int8
elif dtype == trt.int32:
return torch.int32
elif dtype == trt.float16:
return torch.float16
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError(f'{dtype} is not supported by torch')
class TRTWrapper(torch.nn.Module):
def __init__(self,
engine: Union[str, trt.ICudaEngine],
output_names: Optional[Sequence[str]] = None) -> None:
super().__init__()
self.engine = engine
if isinstance(self.engine, str):
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
with open(self.engine, mode='rb') as f:
engine_bytes = f.read()
self.engine = runtime.deserialize_cuda_engine(engine_bytes)
self.context = self.engine.create_execution_context()
names = [_ for _ in self.engine]
input_names = list(filter(self.engine.binding_is_input, names))
self._input_names = input_names
self._output_names = output_names
if self._output_names is None:
output_names = list(set(names) - set(input_names))
self._output_names = output_names
def forward(self, inputs: Dict[str, torch.Tensor]):
bindings = [None] * (len(self._input_names) + len(self._output_names))
for input_name, input_tensor in inputs.items():
idx = self.engine.get_binding_index(input_name)
self.context.set_binding_shape(idx, tuple(input_tensor.shape))
bindings[idx] = input_tensor.contiguous().data_ptr()
# create output tensors
outputs = {}
for output_name in self._output_names:
idx = self.engine.get_binding_index(output_name)
dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
shape = tuple(self.context.get_binding_shape(idx))
device = torch.device('cuda')
output = torch.zeros(size=shape, dtype=dtype, device=device)
outputs[output_name] = output
bindings[idx] = output.data_ptr()
self.context.execute_async_v2(bindings,
torch.cuda.current_stream().cuda_stream)
return outputs
def get_plugin_names():
return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
def main():
load_tensorrt_plugin()
args = parse_args()
if args.eval:
args.postprocessing=True
print('Warnings: evaluation requirement detected, set '
'postprocessing=True for evaluation purpose')
cfg = Config.fromfile(args.config)
cfg.model.pretrained = None
cfg.model.type = cfg.model.type + 'TRT'
cfg = compat_cfg(cfg)
cfg.gpu_ids = [0]
if not args.prefetch:
cfg.data.test_dataloader.workers_per_gpu=0
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# build dataloader
assert cfg.data.test.test_mode
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=2, dist=False, shuffle=False)
test_loader_cfg = {
**test_dataloader_default_args,
**cfg.data.get('test_dataloader', {})
}
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(dataset, **test_loader_cfg)
# build the model
cfg.model.train_cfg = None
model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
# build tensorrt model
if (cfg.model.get('wdet3d', True) == True) and (cfg.model.get('wocc', True) == False):
trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(6 * len(model.pts_bbox_head.task_heads))])
elif (cfg.model.get('wdet3d', True) == True) and (cfg.model.get('wocc', True) == True):
trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(1 + 6 * len(model.pts_bbox_head.task_heads))])
elif (cfg.model.get('wdet3d', True) == False) and (cfg.model.get('wocc', True) == True):
trt_model = TRTWrapper(args.engine, [f'output_{i}' for i in range(1)])
else:
raise(" At least one of wdet3d and wocc is set as True!! ")
num_warmup = 50
pure_inf_time = 0
init_ = True
metas = dict()
# benchmark with several samples and take the average
results = list()
for i, data in enumerate(data_loader):
if init_:
inputs = [t.cuda() for t in data['img_inputs'][0]]
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
metas_ = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
else:
if model.__class__.__name__ in ['BEVDetOCCTRT']:
metas_ = model.get_bev_pool_input(inputs)
elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
metas_, mlp_input = model.get_bev_pool_input(inputs)
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
metas = dict(
ranks_bev=metas_[0].int().contiguous(),
ranks_depth=metas_[1].int().contiguous(),
ranks_feat=metas_[2].int().contiguous(),
interval_starts=metas_[3].int().contiguous(),
interval_lengths=metas_[4].int().contiguous())
elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
metas = dict(
ranks_bev=metas_[0].int().contiguous(),
ranks_depth=metas_[1].int().contiguous(),
ranks_feat=metas_[2].int().contiguous(),
interval_starts=metas_[3].int().contiguous(),
interval_lengths=metas_[4].int().contiguous(),
mlp_input=mlp_input)
init_ = False
img = data['img_inputs'][0][0].cuda().squeeze(0).contiguous()
if img.shape[0] > 6:
img = img[:6]
torch.cuda.synchronize()
start_time = time.perf_counter()
trt_output = trt_model.forward(dict(img=img, **metas))
# postprocessing
if args.postprocessing:
if cfg.model.get('wdet3d', True):
trt_output_det = [trt_output[f'output_{i}'] for i in
range(6 * len(model.pts_bbox_head.task_heads))]
pred = model.result_deserialize(trt_output_det)
img_metas = [dict(box_type_3d=LiDARInstance3DBoxes)]
bbox_list = model.pts_bbox_head.get_bboxes(
pred, img_metas, rescale=True)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
if cfg.model.get('wocc', True):
# occupancy
if cfg.model.get('wdet3d', True):
occ_preds = model.occ_head.get_occ(trt_output['output_6']) # List[(Dx, Dy, Dz), (Dx, Dy, Dz), ...]
else:
occ_preds = model.occ_head.get_occ(trt_output['output_0']) # List[(Dx, Dy, Dz), (Dx, Dy, Dz), ...]
if args.eval:
if cfg.model.get('wdet3d', True) and (not cfg.model.get('wocc', True)):
results.append(bbox_results[0])
elif cfg.model.get('wdet3d', True) and cfg.model.get('wocc', True):
results.append({'pts_bbox': bbox_results[0], 'pred_occ': occ_preds[0]})
elif (not cfg.model.get('wdet3d', False)) and cfg.model.get('wocc', True):
results.append(occ_preds[0])
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % 50 == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{i + 1:<3}/ {args.samples}], '
f'fps: {fps:.2f} img / s')
if (i + 1) == args.samples:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall \nfps: {fps:.2f} img / s '
f'\ninference time: {1000/fps:.2f} ms')
if not args.eval:
return
assert args.eval
eval_kwargs = cfg.get('evaluation', {}).copy()
# hard-code way to remove EvalHook args
for key in [
'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
'rule'
]:
eval_kwargs.pop(key, None)
eval_kwargs.update(dict(metric=args.eval))
print(dataset.evaluate(results, **eval_kwargs))
if __name__ == '__main__':
fps = main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import time
import numpy as np
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--samples', default=1000, help='samples to benchmark')
parser.add_argument(
'--log-interval', default=50, help='interval of logging')
parser.add_argument(
'--mem-only',
action='store_true',
help='Conduct the memory analysis only')
parser.add_argument(
'--no-acceleration',
action='store_true',
help='Omit the pre-computation acceleration')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=False,
shuffle=False)
# build the model and load checkpoint
if not args.no_acceleration:
cfg.model.img_view_transformer.accelerate=True
cfg.model.train_cfg = None
assert cfg.model.type == 'BEVDet', \
'Please use class BEVDet for ' \
'view transformation inference ' \
'speed estimation instead of %s'% cfg.model.type
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
load_checkpoint(model, args.checkpoint, map_location='cpu')
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 100
pure_inf_time = 0
D = model.module.img_view_transformer.D
out_channels = model.module.img_view_transformer.out_channels
depth_net = model.module.img_view_transformer.depth_net
view_transformer = model.module.img_view_transformer
# benchmark with several samples and take the average
for i, data in enumerate(data_loader):
with torch.no_grad():
img_feat, _ = \
model.module.image_encoder(data['img_inputs'][0][0].cuda())
B, N, C, H, W = img_feat.shape
x = depth_net(img_feat.reshape(B * N, C, H, W))
depth_digit = x[:, :D, ...]
tran_feat = x[:, D:D + out_channels, ...]
depth = depth_digit.softmax(dim=1)
input = [img_feat] + [d.cuda() for d in data['img_inputs'][0][1:]]
if i == 0:
precomputed_memory_allocated = 0.0
if view_transformer.accelerate:
start_mem_allocated = torch.cuda.memory_allocated()
view_transformer.pre_compute(input)
end_mem_allocated = torch.cuda.memory_allocated()
precomputed_memory_allocated = \
end_mem_allocated - start_mem_allocated
ref_max_mem_allocated = torch.cuda.max_memory_allocated()
# occupy the memory
size = (ref_max_mem_allocated - end_mem_allocated) // 4
occupy_tensor = torch.zeros(
size=(size, ), device='cuda', dtype=torch.float32)
print('Memory analysis: \n'
'precomputed_memory_allocated : %d B / %.01f MB \n' %
(precomputed_memory_allocated,
precomputed_memory_allocated / 1024 / 1024))
start_mem_allocated = torch.cuda.memory_allocated()
bev_feat = view_transformer.view_transform_core(
input, depth, tran_feat)[0]
end_max_mem_allocated = torch.cuda.max_memory_allocated()
peak_memory_allocated = \
end_max_mem_allocated - start_mem_allocated
total_memory_requirement = \
precomputed_memory_allocated + peak_memory_allocated
print('Memory analysis: \n'
'Memory requirement : %d B / %.01f MB \n' %
(total_memory_requirement,
total_memory_requirement / 1024 / 1024))
if args.mem_only:
return
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
view_transformer.view_transform(input, depth, tran_feat)[0]
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{i + 1:<3}/ {args.samples}], '
f'fps: {fps:.1f} img / s')
if (i + 1) == args.samples:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall fps: {fps:.1f} img / s')
return fps
if __name__ == '__main__':
repeat_times = 1
fps_list = []
for _ in range(repeat_times):
fps = main()
time.sleep(5)
fps_list.append(fps)
fps_list = np.array(fps_list, dtype=np.float32)
print(f'Mean Overall fps: {fps_list.mean():.4f} +'
f' {np.sqrt(fps_list.var()):.4f} img / s')
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import sys
sys.path.insert(0, os.getcwd())
import torch
from mmcv import Config, DictAction
from mmdet3d.models import build_model
try:
from mmcv.cnn import get_model_complexity_info
except ImportError:
raise ImportError('Please upgrade mmcv to >0.6.2')
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument(
'--shape',
type=int,
nargs='+',
default=[40000, 4],
help='input point cloud size')
parser.add_argument(
'--modality',
type=str,
default='point',
choices=['point', 'image', 'multi'],
help='input data modality')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def construct_input(input_shape):
rot = torch.eye(4).float().cuda().view(1, 1, 4, 4).expand(1,6,4,4)
intrins = torch.eye(3).float().cuda().view(1,1, 3, 3).expand(1,6,3,3)
input = dict(img_inputs=[
torch.ones(()).new_empty((1, 6, *input_shape)).cuda(), rot,
rot, intrins, intrins,
torch.ones((1, 6, 3)).cuda(),
torch.eye(3).float().cuda().view(1, 3, 3)
])
return input
def main():
args = parse_args()
if args.modality == 'point':
assert len(args.shape) == 2, 'invalid input shape'
input_shape = tuple(args.shape)
elif args.modality == 'image':
if len(args.shape) == 1:
input_shape = (3, args.shape[0], args.shape[0])
elif len(args.shape) == 2:
input_shape = (3, ) + tuple(args.shape)
else:
raise ValueError('invalid input shape')
elif args.modality == 'multi':
raise NotImplementedError(
'FLOPs counter is currently not supported for models with '
'multi-modality input')
cfg = Config.fromfile(args.config)
# if 'stereo' in args.config or 'longterm' in args.config:
# assert False,'Config has not supported: %s ' % args.config
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
model = build_model(
cfg.model,
train_cfg=cfg.get('train_cfg'),
test_cfg=cfg.get('test_cfg'))
if torch.cuda.is_available():
model.cuda()
model.eval()
if hasattr(model, 'forward_dummy'):
model.forward = model.forward_dummy
else:
raise NotImplementedError(
'FLOPs counter is currently not supported for {}'.format(
model.__class__.__name__))
flops, params = get_model_complexity_info(
model, input_shape, input_constructor=construct_input)
split_line = '=' * 30
print(f'{split_line}\nInput shape: {input_shape}\n'
f'Flops: {flops}\nParams: {params}\n{split_line}')
print('!!!Please be cautious if you use the results in papers. '
'You may need to check if all ops are supported and verify that the '
'flops computation is correct.')
if __name__ == '__main__':
main()
# Copyright (c) Phigent Robotics. All rights reserved.
import argparse
import json
import os
import pickle
import cv2
import numpy as np
from pyquaternion.quaternion import Quaternion
from mmdet3d.core.bbox.structures.lidar_box3d import LiDARInstance3DBoxes as LB
def check_point_in_img(points, height, width):
valid = np.logical_and(points[:, 0] >= 0, points[:, 1] >= 0)
valid = np.logical_and(
valid, np.logical_and(points[:, 0] < width, points[:, 1] < height))
return valid
def depth2color(depth):
gray = max(0, min((depth + 2.5) / 3.0, 1.0))
max_lumi = 200
colors = np.array(
[[max_lumi, 0, max_lumi], [max_lumi, 0, 0], [max_lumi, max_lumi, 0],
[0, max_lumi, 0], [0, max_lumi, max_lumi], [0, 0, max_lumi]],
dtype=np.float32)
if gray == 1:
return tuple(colors[-1].tolist())
num_rank = len(colors) - 1
rank = np.floor(gray * num_rank).astype(np.int)
diff = (gray - rank / num_rank) * num_rank
return tuple(
(colors[rank] + (colors[rank + 1] - colors[rank]) * diff).tolist())
def lidar2img(points_lidar, camrera_info):
points_lidar_homogeneous = \
np.concatenate([points_lidar,
np.ones((points_lidar.shape[0], 1),
dtype=points_lidar.dtype)], axis=1)
camera2lidar = np.eye(4, dtype=np.float32)
camera2lidar[:3, :3] = camrera_info['sensor2lidar_rotation']
camera2lidar[:3, 3] = camrera_info['sensor2lidar_translation']
lidar2camera = np.linalg.inv(camera2lidar)
points_camera_homogeneous = points_lidar_homogeneous @ lidar2camera.T
points_camera = points_camera_homogeneous[:, :3]
valid = np.ones((points_camera.shape[0]), dtype=bool)
valid = np.logical_and(points_camera[:, -1] > 0.5, valid)
points_camera = points_camera / points_camera[:, 2:3]
camera2img = camrera_info['cam_intrinsic']
points_img = points_camera @ camera2img.T
points_img = points_img[:, :2]
return points_img, valid
def get_lidar2global(infos):
lidar2ego = np.eye(4, dtype=np.float32)
lidar2ego[:3, :3] = Quaternion(infos['lidar2ego_rotation']).rotation_matrix
lidar2ego[:3, 3] = infos['lidar2ego_translation']
ego2global = np.eye(4, dtype=np.float32)
ego2global[:3, :3] = Quaternion(
infos['ego2global_rotation']).rotation_matrix
ego2global[:3, 3] = infos['ego2global_translation']
return ego2global @ lidar2ego
def parse_args():
parser = argparse.ArgumentParser(description='Visualize the predicted '
'result of nuScenes')
parser.add_argument(
'res', help='Path to the predicted result in json format')
parser.add_argument(
'--show-range',
type=int,
default=50,
help='Range of visualization in BEV')
parser.add_argument(
'--canva-size', type=int, default=1000, help='Size of canva in pixel')
parser.add_argument(
'--vis-frames',
type=int,
default=500,
help='Number of frames for visualization')
parser.add_argument(
'--scale-factor',
type=int,
default=4,
help='Trade-off between image-view and bev in size of '
'the visualized canvas')
parser.add_argument(
'--vis-thred',
type=float,
default=0.3,
help='Threshold the predicted results')
parser.add_argument('--draw-gt', action='store_true')
parser.add_argument(
'--version',
type=str,
default='val',
help='Version of nuScenes dataset')
parser.add_argument(
'--root_path',
type=str,
default='./data/nuscenes',
help='Path to nuScenes dataset')
parser.add_argument(
'--save_path',
type=str,
default='./vis',
help='Path to save visualization results')
parser.add_argument(
'--format',
type=str,
default='video',
choices=['video', 'image'],
help='The desired format of the visualization result')
parser.add_argument(
'--fps', type=int, default=20, help='Frame rate of video')
parser.add_argument(
'--video-prefix', type=str, default='vis', help='name of video')
args = parser.parse_args()
return args
color_map = {0: (255, 255, 0), 1: (0, 255, 255)}
def main():
args = parse_args()
# load predicted results
res = json.load(open(args.res, 'r'))
# load dataset information
info_path = \
args.root_path + '/bevdetv2-nuscenes_infos_%s.pkl' % args.version
dataset = pickle.load(open(info_path, 'rb'))
# prepare save path and medium
vis_dir = args.save_path
if not os.path.exists(vis_dir):
os.makedirs(vis_dir)
print('saving visualized result to %s' % vis_dir)
scale_factor = args.scale_factor
canva_size = args.canva_size
show_range = args.show_range
if args.format == 'video':
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
vout = cv2.VideoWriter(
os.path.join(vis_dir, '%s.mp4' % args.video_prefix), fourcc,
args.fps, (int(1600 / scale_factor * 3),
int(900 / scale_factor * 2 + canva_size)))
draw_boxes_indexes_bev = [(0, 1), (1, 2), (2, 3), (3, 0)]
draw_boxes_indexes_img_view = [(0, 1), (1, 2), (2, 3), (3, 0), (4, 5),
(5, 6), (6, 7), (7, 4), (0, 4), (1, 5),
(2, 6), (3, 7)]
views = [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
]
print('start visualizing results')
for cnt, infos in enumerate(
dataset['infos'][:min(args.vis_frames, len(dataset['infos']))]):
if cnt % 10 == 0:
print('%d/%d' % (cnt, min(args.vis_frames, len(dataset['infos']))))
# collect instances
pred_res = res['results'][infos['token']]
pred_boxes = [
pred_res[rid]['translation'] + pred_res[rid]['size'] + [
Quaternion(pred_res[rid]['rotation']).yaw_pitch_roll[0] +
np.pi / 2
] for rid in range(len(pred_res))
]
if len(pred_boxes) == 0:
corners_lidar = np.zeros((0, 3), dtype=np.float32)
else:
pred_boxes = np.array(pred_boxes, dtype=np.float32)
boxes = LB(pred_boxes, origin=(0.5, 0.5, 0.0))
corners_global = boxes.corners.numpy().reshape(-1, 3)
corners_global = np.concatenate(
[corners_global,
np.ones([corners_global.shape[0], 1])],
axis=1)
l2g = get_lidar2global(infos)
corners_lidar = corners_global @ np.linalg.inv(l2g).T
corners_lidar = corners_lidar[:, :3]
pred_flag = np.ones((corners_lidar.shape[0] // 8, ), dtype=np.bool)
scores = [
pred_res[rid]['detection_score'] for rid in range(len(pred_res))
]
if args.draw_gt:
gt_boxes = infos['gt_boxes']
gt_boxes[:, -1] = gt_boxes[:, -1] + np.pi / 2
width = gt_boxes[:, 4].copy()
gt_boxes[:, 4] = gt_boxes[:, 3]
gt_boxes[:, 3] = width
corners_lidar_gt = \
LB(infos['gt_boxes'],
origin=(0.5, 0.5, 0.5)).corners.numpy().reshape(-1, 3)
corners_lidar = np.concatenate([corners_lidar, corners_lidar_gt],
axis=0)
gt_flag = np.ones((corners_lidar_gt.shape[0] // 8), dtype=np.bool)
pred_flag = np.concatenate(
[pred_flag, np.logical_not(gt_flag)], axis=0)
scores = scores + [0 for _ in range(infos['gt_boxes'].shape[0])]
scores = np.array(scores, dtype=np.float32)
sort_ids = np.argsort(scores)
# image view
imgs = []
for view in views:
img = cv2.imread(infos['cams'][view]['data_path'])
# draw instances
corners_img, valid = lidar2img(corners_lidar, infos['cams'][view])
valid = np.logical_and(
valid,
check_point_in_img(corners_img, img.shape[0], img.shape[1]))
valid = valid.reshape(-1, 8)
corners_img = corners_img.reshape(-1, 8, 2).astype(np.int)
for aid in range(valid.shape[0]):
for index in draw_boxes_indexes_img_view:
if valid[aid, index[0]] and valid[aid, index[1]]:
cv2.line(
img,
tuple(corners_img[aid, index[0]]),
tuple(corners_img[aid, index[1]]),
color=color_map[int(pred_flag[aid])],
thickness=scale_factor)
imgs.append(img)
# bird-eye-view
canvas = np.zeros((int(canva_size), int(canva_size), 3),
dtype=np.uint8)
# draw lidar points
lidar_points = np.fromfile(infos['lidar_path'], dtype=np.float32)
lidar_points = lidar_points.reshape(-1, 5)[:, :3]
lidar_points[:, 1] = -lidar_points[:, 1]
lidar_points[:, :2] = \
(lidar_points[:, :2] + show_range) / show_range / 2.0 * canva_size
for p in lidar_points:
if check_point_in_img(
p.reshape(1, 3), canvas.shape[1], canvas.shape[0])[0]:
color = depth2color(p[2])
cv2.circle(
canvas, (int(p[0]), int(p[1])),
radius=0,
color=color,
thickness=1)
# draw instances
corners_lidar = corners_lidar.reshape(-1, 8, 3)
corners_lidar[:, :, 1] = -corners_lidar[:, :, 1]
bottom_corners_bev = corners_lidar[:, [0, 3, 7, 4], :2]
bottom_corners_bev = \
(bottom_corners_bev + show_range) / show_range / 2.0 * canva_size
bottom_corners_bev = np.round(bottom_corners_bev).astype(np.int32)
center_bev = corners_lidar[:, [0, 3, 7, 4], :2].mean(axis=1)
head_bev = corners_lidar[:, [0, 4], :2].mean(axis=1)
canter_canvas = \
(center_bev + show_range) / show_range / 2.0 * canva_size
center_canvas = canter_canvas.astype(np.int32)
head_canvas = (head_bev + show_range) / show_range / 2.0 * canva_size
head_canvas = head_canvas.astype(np.int32)
for rid in sort_ids:
score = scores[rid]
if score < args.vis_thred and pred_flag[rid]:
continue
score = min(score * 2.0, 1.0) if pred_flag[rid] else 1.0
color = color_map[int(pred_flag[rid])]
for index in draw_boxes_indexes_bev:
cv2.line(
canvas,
bottom_corners_bev[rid, index[0]],
bottom_corners_bev[rid, index[1]],
[color[0] * score, color[1] * score, color[2] * score],
thickness=1)
cv2.line(
canvas,
center_canvas[rid],
head_canvas[rid],
[color[0] * score, color[1] * score, color[2] * score],
1,
lineType=8)
# fuse image-view and bev
img = np.zeros((900 * 2 + canva_size * scale_factor, 1600 * 3, 3),
dtype=np.uint8)
img[:900, :, :] = np.concatenate(imgs[:3], axis=1)
img_back = np.concatenate(
[imgs[3][:, ::-1, :], imgs[4][:, ::-1, :], imgs[5][:, ::-1, :]],
axis=1)
img[900 + canva_size * scale_factor:, :, :] = img_back
img = cv2.resize(img, (int(1600 / scale_factor * 3),
int(900 / scale_factor * 2 + canva_size)))
w_begin = int((1600 * 3 / scale_factor - canva_size) // 2)
img[int(900 / scale_factor):int(900 / scale_factor) + canva_size,
w_begin:w_begin + canva_size, :] = canvas
if args.format == 'image':
cv2.imwrite(os.path.join(vis_dir, '%s.jpg' % infos['token']), img)
elif args.format == 'video':
vout.write(img)
if args.format == 'video':
vout.release()
if __name__ == '__main__':
main()
import os
import mmcv
import open3d as o3d
import numpy as np
import torch
import pickle
import math
from typing import Tuple, List, Dict, Iterable
import argparse
import cv2
NOT_OBSERVED = -1
FREE = 0
OCCUPIED = 1
FREE_LABEL = 17
BINARY_OBSERVED = 1
BINARY_NOT_OBSERVED = 0
VOXEL_SIZE = [0.4, 0.4, 0.4]
POINT_CLOUD_RANGE = [-40, -40, -1, 40, 40, 5.4]
SPTIAL_SHAPE = [200, 200, 16]
TGT_VOXEL_SIZE = [0.4, 0.4, 0.4]
TGT_POINT_CLOUD_RANGE = [-40, -40, -1, 40, 40, 5.4]
colormap_to_colors = np.array(
[
[0, 0, 0, 255], # 0 undefined
[112, 128, 144, 255], # 1 barrier orange
[220, 20, 60, 255], # 2 bicycle Blue
[255, 127, 80, 255], # 3 bus Darkslategrey
[255, 158, 0, 255], # 4 car Crimson
[233, 150, 70, 255], # 5 cons. Veh Orangered
[255, 61, 99, 255], # 6 motorcycle Darkorange
[0, 0, 230, 255], # 7 pedestrian Darksalmon
[47, 79, 79, 255], # 8 traffic cone Red
[255, 140, 0, 255],# 9 trailer Slategrey
[255, 99, 71, 255],# 10 truck Burlywood
[0, 207, 191, 255], # 11 drive sur Green
[175, 0, 75, 255], # 12 other lat nuTonomy green
[75, 0, 75, 255], # 13 sidewalk
[112, 180, 60, 255], # 14 terrain
[222, 184, 135, 255], # 15 manmade
[0, 175, 0, 255], # 16 vegeyation
], dtype=np.float32)
def voxel2points(voxel, occ_show, voxelSize):
"""
Args:
voxel: (Dx, Dy, Dz)
occ_show: (Dx, Dy, Dz)
voxelSize: (dx, dy, dz)
Returns:
points: (N, 3) 3: (x, y, z)
voxel: (N, ) cls_id
occIdx: (x_idx, y_idx, z_idx)
"""
occIdx = torch.where(occ_show)
points = torch.cat((occIdx[0][:, None] * voxelSize[0] + POINT_CLOUD_RANGE[0], \
occIdx[1][:, None] * voxelSize[1] + POINT_CLOUD_RANGE[1], \
occIdx[2][:, None] * voxelSize[2] + POINT_CLOUD_RANGE[2]),
dim=1) # (N, 3) 3: (x, y, z)
return points, voxel[occIdx], occIdx
def voxel_profile(voxel, voxel_size):
"""
Args:
voxel: (N, 3) 3:(x, y, z)
voxel_size: (vx, vy, vz)
Returns:
box: (N, 7) (x, y, z - dz/2, vx, vy, vz, 0)
"""
centers = torch.cat((voxel[:, :2], voxel[:, 2][:, None] - voxel_size[2] / 2), dim=1) # (x, y, z - dz/2)
# centers = voxel
wlh = torch.cat((torch.tensor(voxel_size[0]).repeat(centers.shape[0])[:, None],
torch.tensor(voxel_size[1]).repeat(centers.shape[0])[:, None],
torch.tensor(voxel_size[2]).repeat(centers.shape[0])[:, None]), dim=1)
yaw = torch.full_like(centers[:, 0:1], 0)
return torch.cat((centers, wlh, yaw), dim=1)
def rotz(t):
"""Rotation about the z-axis."""
c = torch.cos(t)
s = torch.sin(t)
return torch.tensor([[c, -s, 0],
[s, c, 0],
[0, 0, 1]])
def my_compute_box_3d(center, size, heading_angle):
"""
Args:
center: (N, 3) 3: (x, y, z - dz/2)
size: (N, 3) 3: (vx, vy, vz)
heading_angle: (N, 1)
Returns:
corners_3d: (N, 8, 3)
"""
h, w, l = size[:, 2], size[:, 0], size[:, 1]
center[:, 2] = center[:, 2] + h / 2
l, w, h = (l / 2).unsqueeze(1), (w / 2).unsqueeze(1), (h / 2).unsqueeze(1)
x_corners = torch.cat([-l, l, l, -l, -l, l, l, -l], dim=1)[..., None]
y_corners = torch.cat([w, w, -w, -w, w, w, -w, -w], dim=1)[..., None]
z_corners = torch.cat([h, h, h, h, -h, -h, -h, -h], dim=1)[..., None]
corners_3d = torch.cat([x_corners, y_corners, z_corners], dim=2)
corners_3d[..., 0] += center[:, 0:1]
corners_3d[..., 1] += center[:, 1:2]
corners_3d[..., 2] += center[:, 2:3]
return corners_3d
def show_point_cloud(points: np.ndarray, colors=True, points_colors=None, bbox3d=None, voxelize=False,
bbox_corners=None, linesets=None, vis=None, offset=[0,0,0], large_voxel=True, voxel_size=0.4):
"""
:param points: (N, 3) 3:(x, y, z)
:param colors: false 不显示点云颜色
:param points_colors: (N, 4)
:param bbox3d: voxel grid (N, 7) 7: (center, wlh, yaw=0)
:param voxelize: false 不显示voxel边界
:param bbox_corners: (N, 8, 3) voxel grid 角点坐标, 用于绘制voxel grid 边界.
:param linesets: 用于绘制voxel grid 边界.
:return:
"""
if vis is None:
vis = o3d.visualization.VisualizerWithKeyCallback()
vis.create_window()
if isinstance(offset, list) or isinstance(offset, tuple):
offset = np.array(offset)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points+offset)
if colors:
pcd.colors = o3d.utility.Vector3dVector(points_colors[:, :3])
mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(
size=1, origin=[0, 0, 0])
voxelGrid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=voxel_size)
if large_voxel:
vis.add_geometry(voxelGrid)
else:
vis.add_geometry(pcd)
if voxelize:
line_sets = o3d.geometry.LineSet()
line_sets.points = o3d.open3d.utility.Vector3dVector(bbox_corners.reshape((-1, 3))+offset)
line_sets.lines = o3d.open3d.utility.Vector2iVector(linesets.reshape((-1, 2)))
line_sets.paint_uniform_color((0, 0, 0))
vis.add_geometry(line_sets)
vis.add_geometry(mesh_frame)
# ego_pcd = o3d.geometry.PointCloud()
# ego_points = generate_the_ego_car()
# ego_pcd.points = o3d.utility.Vector3dVector(ego_points)
# vis.add_geometry(ego_pcd)
return vis
def show_occ(occ_state, occ_show, voxel_size, vis=None, offset=[0, 0, 0]):
"""
Args:
occ_state: (Dx, Dy, Dz), cls_id
occ_show: (Dx, Dy, Dz), bool
voxel_size: [0.4, 0.4, 0.4]
vis: Visualizer
offset:
Returns:
"""
colors = colormap_to_colors / 255
pcd, labels, occIdx = voxel2points(occ_state, occ_show, voxel_size)
# pcd: (N, 3) 3: (x, y, z)
# labels: (N, ) cls_id
_labels = labels % len(colors)
pcds_colors = colors[_labels] # (N, 4)
bboxes = voxel_profile(pcd, voxel_size) # (N, 7) 7: (x, y, z - dz/2, dx, dy, dz, 0)
bboxes_corners = my_compute_box_3d(bboxes[:, 0:3], bboxes[:, 3:6], bboxes[:, 6:7]) # (N, 8, 3)
bases_ = torch.arange(0, bboxes_corners.shape[0] * 8, 8)
edges = torch.tensor([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]]) # lines along y-axis
edges = edges.reshape((1, 12, 2)).repeat(bboxes_corners.shape[0], 1, 1) # (N, 12, 2)
# (N, 12, 2) + (N, 1, 1) --> (N, 12, 2) 此时edges中记录的是bboxes_corners的整体id: (0, N*8).
edges = edges + bases_[:, None, None]
vis = show_point_cloud(
points=pcd.numpy(),
colors=True,
points_colors=pcds_colors,
voxelize=True,
bbox3d=bboxes.numpy(),
bbox_corners=bboxes_corners.numpy(),
linesets=edges.numpy(),
vis=vis,
offset=offset,
large_voxel=True,
voxel_size=0.4
)
return vis
def generate_the_ego_car():
ego_range = [-2, -1, 0, 2, 1, 1.5]
ego_voxel_size=[0.1, 0.1, 0.1]
ego_xdim = int((ego_range[3] - ego_range[0]) / ego_voxel_size[0])
ego_ydim = int((ego_range[4] - ego_range[1]) / ego_voxel_size[1])
ego_zdim = int((ego_range[5] - ego_range[2]) / ego_voxel_size[2])
temp_x = np.arange(ego_xdim)
temp_y = np.arange(ego_ydim)
temp_z = np.arange(ego_zdim)
ego_xyz = np.stack(np.meshgrid(temp_y, temp_x, temp_z), axis=-1).reshape(-1, 3)
ego_point_x = (ego_xyz[:, 0:1] + 0.5) / ego_xdim * (ego_range[3] - ego_range[0]) + ego_range[0]
ego_point_y = (ego_xyz[:, 1:2] + 0.5) / ego_ydim * (ego_range[4] - ego_range[1]) + ego_range[1]
ego_point_z = (ego_xyz[:, 2:3] + 0.5) / ego_zdim * (ego_range[5] - ego_range[2]) + ego_range[2]
ego_point_xyz = np.concatenate((ego_point_y, ego_point_x, ego_point_z), axis=-1)
ego_points_label = (np.ones((ego_point_xyz.shape[0]))*16).astype(np.uint8)
ego_dict = {}
ego_dict['point'] = ego_point_xyz
ego_dict['label'] = ego_points_label
return ego_point_xyz
def parse_args():
parser = argparse.ArgumentParser(description='Visualize the predicted '
'result of nuScenes')
parser.add_argument(
'res', help='Path to the predicted result')
parser.add_argument(
'--canva-size', type=int, default=1000, help='Size of canva in pixel')
parser.add_argument(
'--vis-frames',
type=int,
default=500,
help='Number of frames for visualization')
parser.add_argument(
'--scale-factor',
type=int,
default=4,
help='Trade-off between image-view and bev in size of '
'the visualized canvas')
parser.add_argument(
'--version',
type=str,
default='val',
help='Version of nuScenes dataset')
parser.add_argument('--draw-gt', action='store_true')
parser.add_argument(
'--root_path',
type=str,
default='./data/nuscenes',
help='Path to nuScenes dataset')
parser.add_argument(
'--save_path',
type=str,
default='./vis',
help='Path to save visualization results')
parser.add_argument(
'--format',
type=str,
default='image',
choices=['video', 'image'],
help='The desired format of the visualization result')
parser.add_argument(
'--fps', type=int, default=10, help='Frame rate of video')
parser.add_argument(
'--video-prefix', type=str, default='vis', help='name of video')
args = parser.parse_args()
return args
def main():
args = parse_args()
# load predicted results
results_dir = args.res
# load dataset information
info_path = \
args.root_path + '/bevdetv2-nuscenes_infos_%s.pkl' % args.version
dataset = pickle.load(open(info_path, 'rb'))
# prepare save path and medium
vis_dir = args.save_path
if not os.path.exists(vis_dir):
os.makedirs(vis_dir)
print('saving visualized result to %s' % vis_dir)
scale_factor = args.scale_factor
canva_size = args.canva_size
if args.format == 'video':
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
vout = cv2.VideoWriter(
os.path.join(vis_dir, '%s.mp4' % args.video_prefix), fourcc,
args.fps, (int(1600 / scale_factor * 3),
int(900 / scale_factor * 2 + canva_size)))
views = [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
]
print('start visualizing results')
vis = o3d.visualization.VisualizerWithKeyCallback()
vis.create_window()
for cnt, info in enumerate(
dataset['infos'][:min(args.vis_frames, len(dataset['infos']))]):
if cnt % 10 == 0:
print('%d/%d' % (cnt, min(args.vis_frames, len(dataset['infos']))))
scene_name = info['scene_name']
sample_token = info['token']
pred_occ_path = os.path.join(results_dir, scene_name, sample_token, 'pred.npz')
gt_occ_path = info['occ_path']
pred_occ = np.load(pred_occ_path)['pred']
gt_data = np.load(os.path.join(args.root_path, gt_occ_path, 'labels.npz'))
voxel_label = gt_data['semantics']
lidar_mask = gt_data['mask_lidar']
camera_mask = gt_data['mask_camera']
# load imgs
imgs = []
for view in views:
img = cv2.imread(info['cams'][view]['data_path'])
imgs.append(img)
# occ_canvas
voxel_show = np.logical_and(pred_occ != FREE_LABEL, camera_mask)
# voxel_show = pred_occ != FREE_LABEL
voxel_size = VOXEL_SIZE
vis = show_occ(torch.from_numpy(pred_occ), torch.from_numpy(voxel_show), voxel_size=voxel_size, vis=vis,
offset=[0, pred_occ.shape[0] * voxel_size[0] * 1.2 * 0, 0])
if args.draw_gt:
voxel_show = np.logical_and(voxel_label != FREE_LABEL, camera_mask)
vis = show_occ(torch.from_numpy(voxel_label), torch.from_numpy(voxel_show), voxel_size=voxel_size, vis=vis,
offset=[0, voxel_label.shape[0] * voxel_size[0] * 1.2 * 1, 0])
view_control = vis.get_view_control()
look_at = np.array([-0.185, 0.513, 3.485])
front = np.array([-0.974, -0.055, 0.221])
up = np.array([0.221, 0.014, 0.975])
zoom = np.array([0.08])
view_control.set_lookat(look_at)
view_control.set_front(front)
view_control.set_up(up)
view_control.set_zoom(zoom)
opt = vis.get_render_option()
opt.background_color = np.asarray([1, 1, 1])
opt.line_width = 5
vis.poll_events()
vis.update_renderer()
vis.run()
# if args.format == 'image':
# out_dir = os.path.join(vis_dir, f'{scene_name}', f'{sample_token}')
# mmcv.mkdir_or_exist(out_dir)
# vis.capture_screen_image(os.path.join(out_dir, 'screen_occ.png'), do_render=True)
occ_canvas = vis.capture_screen_float_buffer(do_render=True)
occ_canvas = np.asarray(occ_canvas)
occ_canvas = (occ_canvas * 255).astype(np.uint8)
occ_canvas = occ_canvas[..., [2, 1, 0]]
occ_canvas_resize = cv2.resize(occ_canvas, (canva_size, canva_size), interpolation=cv2.INTER_CUBIC)
vis.clear_geometries()
big_img = np.zeros((900 * 2 + canva_size * scale_factor, 1600 * 3, 3),
dtype=np.uint8)
big_img[:900, :, :] = np.concatenate(imgs[:3], axis=1)
img_back = np.concatenate(
[imgs[3][:, ::-1, :], imgs[4][:, ::-1, :], imgs[5][:, ::-1, :]],
axis=1)
big_img[900 + canva_size * scale_factor:, :, :] = img_back
big_img = cv2.resize(big_img, (int(1600 / scale_factor * 3),
int(900 / scale_factor * 2 + canva_size)))
w_begin = int((1600 * 3 / scale_factor - canva_size) // 2)
big_img[int(900 / scale_factor):int(900 / scale_factor) + canva_size,
w_begin:w_begin + canva_size, :] = occ_canvas_resize
if args.format == 'image':
out_dir = os.path.join(vis_dir, f'{scene_name}', f'{sample_token}')
mmcv.mkdir_or_exist(out_dir)
for i, img in enumerate(imgs):
cv2.imwrite(os.path.join(out_dir, f'img{i}.png'), img)
cv2.imwrite(os.path.join(out_dir, 'occ.png'), occ_canvas)
cv2.imwrite(os.path.join(out_dir, 'overall.png'), big_img)
elif args.format == 'video':
cv2.putText(big_img, f'{cnt:{cnt}}', (5, 15), fontFace=cv2.FONT_HERSHEY_COMPLEX, color=(0, 0, 0),
fontScale=0.5)
cv2.putText(big_img, f'{scene_name}', (5, 35), fontFace=cv2.FONT_HERSHEY_COMPLEX, color=(0, 0, 0),
fontScale=0.5)
cv2.putText(big_img, f'{sample_token[:5]}', (5, 55), fontFace=cv2.FONT_HERSHEY_COMPLEX, color=(0, 0, 0),
fontScale=0.5)
vout.write(big_img)
if args.format == 'video':
vout.release()
vis.destroy_window()
if __name__ == '__main__':
main()
\ No newline at end of file
import argparse
import sys
import os
sys.path.insert(0, os.getcwd())
import torch.onnx
from mmcv import Config
from mmdeploy.backend.tensorrt.utils import save, search_cuda_version
try:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import compat_cfg
except ImportError:
from mmdet3d.utils import compat_cfg
import os
from typing import Dict, Optional, Sequence, Union
import h5py
import mmcv
import numpy as np
import onnx
import pycuda.driver as cuda
import tensorrt as trt
import torch
import tqdm
from mmcv.runner import load_checkpoint
from mmdeploy.apis.core import no_mp
from mmdeploy.backend.tensorrt.calib_utils import HDF5Calibrator
from mmdeploy.backend.tensorrt.init_plugins import load_tensorrt_plugin
from mmdeploy.utils import load_config
from packaging import version
from torch.utils.data import DataLoader
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_model
from mmdet.datasets import replace_ImageToTensor
from tools.misc.fuse_conv_bn import fuse_module
class HDF5CalibratorBEVDet(HDF5Calibrator):
def get_batch(self, names: Sequence[str], **kwargs) -> list:
"""Get batch data."""
if self.count < self.dataset_length:
if self.count % 100 == 0:
print('%d/%d' % (self.count, self.dataset_length))
ret = []
for name in names:
input_group = self.calib_data[name]
if name == 'img':
data_np = input_group[str(self.count)][...].astype(
np.float32)
else:
data_np = input_group[str(self.count)][...].astype(
np.int32)
# tile the tensor so we can keep the same distribute
opt_shape = self.input_shapes[name]['opt_shape']
data_shape = data_np.shape
reps = [
int(np.ceil(opt_s / data_s))
for opt_s, data_s in zip(opt_shape, data_shape)
]
data_np = np.tile(data_np, reps)
slice_list = tuple(slice(0, end) for end in opt_shape)
data_np = data_np[slice_list]
data_np_cuda_ptr = cuda.mem_alloc(data_np.nbytes)
cuda.memcpy_htod(data_np_cuda_ptr,
np.ascontiguousarray(data_np))
self.buffers[name] = data_np_cuda_ptr
ret.append(self.buffers[name])
self.count += 1
return ret
else:
return None
def parse_args():
parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
parser.add_argument('config', help='deploy config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('work_dir', help='work dir to save file')
parser.add_argument(
'--prefix', default='bevdet', help='prefix of the save file name')
parser.add_argument(
'--fp16', action='store_true', help='Whether to use tensorrt fp16')
parser.add_argument(
'--int8', action='store_true', help='Whether to use tensorrt int8')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument('--calib_num', type=int, help='num to calib')
args = parser.parse_args()
return args
def get_plugin_names():
return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
def create_calib_input_data_impl(calib_file: str,
dataloader: DataLoader,
model_partition: bool = False,
metas: list = [],
calib_num = None) -> None:
with h5py.File(calib_file, mode='w') as file:
calib_data_group = file.create_group('calib_data')
assert not model_partition
# create end2end group
input_data_group = calib_data_group.create_group('end2end')
input_group_img = input_data_group.create_group('img')
input_keys = [
'ranks_bev', 'ranks_depth', 'ranks_feat', 'interval_starts',
'interval_lengths'
]
input_groups = []
for input_key in input_keys:
input_groups.append(input_data_group.create_group(input_key))
metas = [
metas[i].int().detach().cpu().numpy() for i in range(len(metas))
]
for data_id, input_data in enumerate(tqdm.tqdm(dataloader)):
# save end2end data
if (calib_num is not None) and (data_id > calib_num):
break
input_tensor = input_data['img_inputs'][0][0]
input_ndarray = input_tensor.squeeze(0).detach().cpu().numpy()
# print(input_ndarray.shape, input_ndarray.dtype)
input_group_img.create_dataset(
str(data_id),
shape=input_ndarray.shape,
compression='gzip',
compression_opts=4,
data=input_ndarray)
for kid, input_key in enumerate(input_keys):
input_groups[kid].create_dataset(
str(data_id),
shape=metas[kid].shape,
compression='gzip',
compression_opts=4,
data=metas[kid])
file.flush()
def create_calib_input_data(calib_file: str,
deploy_cfg: Union[str, mmcv.Config],
model_cfg: Union[str, mmcv.Config],
model_checkpoint: Optional[str] = None,
dataset_cfg: Optional[Union[str,
mmcv.Config]] = None,
dataset_type: str = 'val',
device: str = 'cpu',
metas: list = [None],
calib_num = None) -> None:
"""Create dataset for post-training quantization.
Args:
calib_file (str): The output calibration data file.
deploy_cfg (str | mmcv.Config): Deployment config file or
Config object.
model_cfg (str | mmcv.Config): Model config file or Config object.
model_checkpoint (str): A checkpoint path of PyTorch model,
defaults to `None`.
dataset_cfg (Optional[Union[str, mmcv.Config]], optional): Model
config to provide calibration dataset. If none, use `model_cfg`
as the dataset config. Defaults to None.
dataset_type (str, optional): The dataset type. Defaults to 'val'.
device (str, optional): Device to create dataset. Defaults to 'cpu'.
"""
with no_mp():
if dataset_cfg is None:
dataset_cfg = model_cfg
# load cfg if necessary
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
if dataset_cfg is None:
dataset_cfg = model_cfg
# load dataset_cfg if necessary
dataset_cfg = load_config(dataset_cfg)[0]
from mmdeploy.apis.utils import build_task_processor
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
dataset = task_processor.build_dataset(dataset_cfg, dataset_type)
dataloader = task_processor.build_dataloader(
dataset, 1, 1, dist=False, shuffle=False)
create_calib_input_data_impl(
calib_file, dataloader, model_partition=False, metas=metas, calib_num=calib_num)
def from_onnx(onnx_model: Union[str, onnx.ModelProto],
output_file_prefix: str,
input_shapes: Dict[str, Sequence[int]],
max_workspace_size: int = 0,
fp16_mode: bool = False,
int8_mode: bool = False,
int8_param: Optional[dict] = None,
device_id: int = 0,
log_level: trt.Logger.Severity = trt.Logger.ERROR,
**kwargs) -> trt.ICudaEngine:
"""Create a tensorrt engine from ONNX.
Modified from mmdeploy.backend.tensorrt.utils.from_onnx
"""
import os
old_cuda_device = os.environ.get('CUDA_DEVICE', None)
os.environ['CUDA_DEVICE'] = str(device_id)
import pycuda.autoinit # noqa:F401
if old_cuda_device is not None:
os.environ['CUDA_DEVICE'] = old_cuda_device
else:
os.environ.pop('CUDA_DEVICE')
load_tensorrt_plugin()
# create builder and network
logger = trt.Logger(log_level)
builder = trt.Builder(logger)
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
# parse onnx
parser = trt.OnnxParser(network, logger)
if isinstance(onnx_model, str):
onnx_model = onnx.load(onnx_model)
if not parser.parse(onnx_model.SerializeToString()):
error_msgs = ''
for error in range(parser.num_errors):
error_msgs += f'{parser.get_error(error)}\n'
raise RuntimeError(f'Failed to parse onnx, {error_msgs}')
# config builder
if version.parse(trt.__version__) < version.parse('8'):
builder.max_workspace_size = max_workspace_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size
cuda_version = search_cuda_version()
if cuda_version is not None:
version_major = int(cuda_version.split('.')[0])
if version_major < 11:
# cu11 support cublasLt, so cudnn heuristic tactic should disable CUBLAS_LT # noqa E501
tactic_source = config.get_tactic_sources() - (
1 << int(trt.TacticSource.CUBLAS_LT))
config.set_tactic_sources(tactic_source)
profile = builder.create_optimization_profile()
for input_name, param in input_shapes.items():
min_shape = param['min_shape']
opt_shape = param['opt_shape']
max_shape = param['max_shape']
profile.set_shape(input_name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
if fp16_mode:
if version.parse(trt.__version__) < version.parse('8'):
builder.fp16_mode = fp16_mode
config.set_flag(trt.BuilderFlag.FP16)
if int8_mode:
config.set_flag(trt.BuilderFlag.INT8)
assert int8_param is not None
config.int8_calibrator = HDF5CalibratorBEVDet(
int8_param['calib_file'],
input_shapes,
model_type=int8_param['model_type'],
device_id=device_id,
algorithm=int8_param.get(
'algorithm', trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2))
if version.parse(trt.__version__) < version.parse('8'):
builder.int8_mode = int8_mode
builder.int8_calibrator = config.int8_calibrator
# create engine
engine = builder.build_engine(network, config)
assert engine is not None, 'Failed to create TensorRT engine'
save(engine, output_file_prefix + '.engine')
print('Save engine at ', output_file_prefix + '.engine')
return engine
def main():
args = parse_args()
max_workspace_size = 200*200*256*(2**8)
if not os.path.exists(args.work_dir):
os.makedirs(args.work_dir)
load_tensorrt_plugin()
assert 'bev_pool_v2' in get_plugin_names(), \
'bev_pool_v2 is not in the plugin list of tensorrt, ' \
'please install mmdeploy from ' \
'https://github.com/HuangJunJie2017/mmdeploy.git'
# if args.int8:
# assert args.fp16
model_prefix = args.prefix
if args.int8:
model_prefix = model_prefix + '_int8'
elif args.fp16:
model_prefix = model_prefix + '_fp16'
cfg = Config.fromfile(args.config)
cfg.model.pretrained = None
cfg.model.type = cfg.model.type + 'TRT'
cfg = compat_cfg(cfg)
cfg.gpu_ids = [0]
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# build the dataloader
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=2, dist=False, shuffle=False)
if isinstance(cfg.data.test, dict):
cfg.data.test.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(
cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
for ds_cfg in cfg.data.test:
ds_cfg.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
test_loader_cfg = {
**test_dataloader_default_args,
**cfg.data.get('test_dataloader', {})
}
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(dataset, **test_loader_cfg)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
# assert model.img_view_transformer.grid_size[0] == 128
# assert model.img_view_transformer.grid_size[1] == 128
# assert model.img_view_transformer.grid_size[2] == 1
if os.path.exists(args.checkpoint):
load_checkpoint(model, args.checkpoint, map_location='cpu')
else:
print(args.checkpoint, " does not exists!")
if args.fuse_conv_bn:
model_prefix = model_prefix + '_fuse'
model = fuse_module(model)
model.cuda()
model.eval()
for i, data in enumerate(data_loader):
inputs = [t.cuda() for t in data['img_inputs'][0]]
img = inputs[0].squeeze(0)
if img.shape[0] > 6:
img = img[:6]
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
metas = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
else:
if model.__class__.__name__ in ['BEVDetOCCTRT']:
metas = model.get_bev_pool_input(inputs)
elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
metas, mlp_input = model.get_bev_pool_input(inputs)
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
metas[2].int().contiguous(), metas[0].int().contiguous(),
metas[3].int().contiguous(), metas[4].int().contiguous())
dynamic_axes={
"ranks_depth" : {0: 'M'},
"ranks_feat" : {0: 'M'},
"ranks_bev" : {0: 'M'},
"interval_starts" : {0: 'N'},
"interval_lengths" : {0: 'N'},
}
input_names=[
'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
'interval_starts', 'interval_lengths'
]
elif model.__class__.__name__ in ['BEVDepthOCCTRT']:
onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
metas[2].int().contiguous(), metas[0].int().contiguous(),
metas[3].int().contiguous(), metas[4].int().contiguous(), mlp_input)
dynamic_axes={
"ranks_depth" : {0: 'M'},
"ranks_feat" : {0: 'M'},
"ranks_bev" : {0: 'M'},
"interval_starts" : {0: 'N'},
"interval_lengths" : {0: 'N'},
# "mlp_input" : {0: 'K'},
}
input_names=[
'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
'interval_starts', 'interval_lengths', 'mlp_input',
]
with torch.no_grad():
if (model.wdet3d == True) and (model.wocc == False) :
output_names=[f'output_{j}' for j in range(6 * len(model.pts_bbox_head.task_heads))]
elif (model.wdet3d == True) and (model.wocc == True) :
output_names=[f'output_{j}' for j in range(1 + 6 * len(model.pts_bbox_head.task_heads))]
elif (model.wdet3d == False) and (model.wocc == True) :
output_names=[f'output_{j}' for j in range(1)]
else:
raise(" At least one of wdet3d and wocc is set as True!! ")
model.forward = model.forward_ori
torch.onnx.export(
model,
onnx_input,
args.work_dir + model_prefix + '.onnx',
opset_version=11,
dynamic_axes=dynamic_axes,
input_names=input_names,
output_names=output_names)
print('output_names:', output_names)
print('====== onnx is saved at : ', args.work_dir + model_prefix + '.onnx')
# check onnx model
onnx_model = onnx.load(args.work_dir + model_prefix + '.onnx')
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model.forward = model.forward_with_argmax
output_names = [f'cls_occ_label']
torch.onnx.export(
model,
onnx_input,
args.work_dir + model_prefix + '_with_argmax.onnx',
opset_version=11,
dynamic_axes=dynamic_axes,
input_names=input_names,
output_names=output_names)
print('output_names:', output_names)
print('====== onnx is saved at : ', args.work_dir + model_prefix + '_with_argmax.onnx')
# check onnx model
onnx_model = onnx.load(args.work_dir + model_prefix + '_with_argmax.onnx')
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
break
# convert to tensorrt
num_points = metas[0].shape[0]
num_intervals = metas[3].shape[0]
img_shape = img.shape
input_shapes = dict(
img=dict(
min_shape=img_shape, opt_shape=img_shape, max_shape=img_shape),
ranks_depth=dict(
min_shape=[num_points],
opt_shape=[num_points],
max_shape=[num_points]),
ranks_feat=dict(
min_shape=[num_points],
opt_shape=[num_points],
max_shape=[num_points]),
ranks_bev=dict(
min_shape=[num_points],
opt_shape=[num_points],
max_shape=[num_points]),
interval_starts=dict(
min_shape=[num_intervals],
opt_shape=[num_intervals],
max_shape=[num_intervals]),
interval_lengths=dict(
min_shape=[num_intervals],
opt_shape=[num_intervals],
max_shape=[num_intervals]))
deploy_cfg = dict(
backend_config=dict(
type='tensorrt',
common_config=dict(
fp16_mode=args.fp16,
max_workspace_size=max_workspace_size,
int8_mode=args.int8),
model_inputs=[dict(input_shapes=input_shapes)]),
codebase_config=dict(
type='mmdet3d', task='VoxelDetection', model_type='end2end'))
if args.int8:
calib_filename = 'calib_data.h5'
calib_path = os.path.join(args.work_dir, calib_filename)
create_calib_input_data(
calib_path,
deploy_cfg,
args.config,
args.checkpoint,
dataset_cfg=None,
dataset_type='val',
device='cuda:0',
metas=metas,
calib_num=args.calib_num)
from_onnx(
args.work_dir + model_prefix + '.onnx',
args.work_dir + model_prefix,
fp16_mode=args.fp16,
int8_mode=args.int8,
int8_param=dict(
calib_file=os.path.join(args.work_dir, 'calib_data.h5'),
model_type='end2end'),
max_workspace_size=max_workspace_size,
input_shapes=input_shapes)
# if args.int8:
# os.remove(calib_path)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import pickle
import numpy as np
from nuscenes import NuScenes
from nuscenes.utils.data_classes import Box
from pyquaternion import Quaternion
from tools.data_converter import nuscenes_converter as nuscenes_converter
map_name_from_general_to_detection = {
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.wheelchair': 'ignore',
'human.pedestrian.stroller': 'ignore',
'human.pedestrian.personal_mobility': 'ignore',
'human.pedestrian.police_officer': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'animal': 'ignore',
'vehicle.car': 'car',
'vehicle.motorcycle': 'motorcycle',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.truck': 'truck',
'vehicle.construction': 'construction_vehicle',
'vehicle.emergency.ambulance': 'ignore',
'vehicle.emergency.police': 'ignore',
'vehicle.trailer': 'trailer',
'movable_object.barrier': 'barrier',
'movable_object.trafficcone': 'traffic_cone',
'movable_object.pushable_pullable': 'ignore',
'movable_object.debris': 'ignore',
'static_object.bicycle_rack': 'ignore',
}
classes = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
def get_gt(info):
"""Generate gt labels from info.
Args:
info(dict): Infos needed to generate gt labels.
Returns:
Tensor: GT bboxes.
Tensor: GT labels.
"""
ego2global_rotation = info['cams']['CAM_FRONT']['ego2global_rotation']
ego2global_translation = info['cams']['CAM_FRONT'][
'ego2global_translation']
trans = -np.array(ego2global_translation)
rot = Quaternion(ego2global_rotation).inverse
gt_boxes = list()
gt_labels = list()
for ann_info in info['ann_infos']:
# Use ego coordinate.
if (map_name_from_general_to_detection[ann_info['category_name']]
not in classes
or ann_info['num_lidar_pts'] + ann_info['num_radar_pts'] <= 0):
continue
box = Box(
ann_info['translation'],
ann_info['size'],
Quaternion(ann_info['rotation']),
velocity=ann_info['velocity'],
)
box.translate(trans)
box.rotate(rot)
box_xyz = np.array(box.center)
box_dxdydz = np.array(box.wlh)[[1, 0, 2]]
box_yaw = np.array([box.orientation.yaw_pitch_roll[0]])
box_velo = np.array(box.velocity[:2])
gt_box = np.concatenate([box_xyz, box_dxdydz, box_yaw, box_velo])
gt_boxes.append(gt_box)
gt_labels.append(
classes.index(
map_name_from_general_to_detection[ann_info['category_name']]))
return gt_boxes, gt_labels
def nuscenes_data_prep(root_path, info_prefix, version, max_sweeps=10):
"""Prepare data related to nuScenes dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
max_sweeps (int, optional): Number of input consecutive frames.
Default: 10
"""
nuscenes_converter.create_nuscenes_infos(
root_path, info_prefix, version=version, max_sweeps=max_sweeps)
def add_ann_adj_info(extra_tag):
nuscenes_version = 'v1.0-trainval'
dataroot = './data/nuscenes/'
nuscenes = NuScenes(nuscenes_version, dataroot)
for set in ['train', 'val']:
dataset = pickle.load(
open('%s/%s_infos_%s.pkl' % (dataroot, extra_tag, set), 'rb'))
for id in range(len(dataset['infos'])):
if id % 10 == 0:
print('%d/%d' % (id, len(dataset['infos'])))
info = dataset['infos'][id]
# get sweep adjacent frame info
sample = nuscenes.get('sample', info['token'])
ann_infos = list()
for ann in sample['anns']:
ann_info = nuscenes.get('sample_annotation', ann)
velocity = nuscenes.box_velocity(ann_info['token'])
if np.any(np.isnan(velocity)):
velocity = np.zeros(3)
ann_info['velocity'] = velocity
ann_infos.append(ann_info)
dataset['infos'][id]['ann_infos'] = ann_infos
dataset['infos'][id]['ann_infos'] = get_gt(dataset['infos'][id])
dataset['infos'][id]['scene_token'] = sample['scene_token']
scene = nuscenes.get('scene', sample['scene_token'])
dataset['infos'][id]['scene_name'] = scene['name']
dataset['infos'][id]['occ_path'] = \
'./data/nuscenes/gts/%s/%s'%(scene['name'], info['token'])
with open('%s/%s_infos_%s.pkl' % (dataroot, extra_tag, set),
'wb') as fid:
pickle.dump(dataset, fid)
if __name__ == '__main__':
dataset = 'nuscenes'
version = 'v1.0'
train_version = f'{version}-trainval'
root_path = 'data/nuscenes'
extra_tag = 'bevdetv2-nuscenes'
nuscenes_data_prep(
root_path=root_path,
info_prefix=extra_tag,
version=train_version,
max_sweeps=0)
print('add_ann_infos')
add_ann_adj_info(extra_tag)
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import pickle
from os import path as osp
import mmcv
import numpy as np
from mmcv import track_iter_progress
from mmcv.ops import roi_align
from pycocotools import mask as maskUtils
from pycocotools.coco import COCO
from mmdet3d.core.bbox import box_np_ops as box_np_ops
from mmdet3d.datasets import build_dataset
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
def _poly2mask(mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
rle = maskUtils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = maskUtils.decode(rle)
return mask
def _parse_coco_ann_info(ann_info):
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0:
continue
bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_masks_ann.append(ann['segmentation'])
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)
return ann
def crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):
import torch
from torch.nn.modules.utils import _pair
device = pos_proposals.device
num_pos = pos_proposals.size(0)
fake_inds = (
torch.arange(num_pos,
device=device).to(dtype=pos_proposals.dtype)[:, None])
rois = torch.cat([fake_inds, pos_proposals], dim=1) # Nx5
mask_size = _pair(28)
rois = rois.to(device=device)
gt_masks_th = (
torch.from_numpy(gt_masks).to(device).index_select(
0, pos_assigned_gt_inds).to(dtype=rois.dtype))
# Use RoIAlign could apparently accelerate the training (~0.1s/iter)
targets = (
roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))
return targets
def crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):
num_pos = pos_proposals.shape[0]
masks = []
img_patches = []
for i in range(num_pos):
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = pos_proposals[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]
masked_img = gt_mask[..., None] * org_img
img_patch = masked_img[y1:y1 + h, x1:x1 + w]
img_patches.append(img_patch)
masks.append(mask_patch)
return img_patches, masks
def create_groundtruth_database(dataset_class_name,
data_path,
info_prefix,
info_path=None,
mask_anno_path=None,
used_classes=None,
database_save_path=None,
db_info_save_path=None,
relative_path=True,
add_rgb=False,
lidar_only=False,
bev_only=False,
coors_range=None,
with_mask=False):
"""Given the raw data, generate the ground truth database.
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str, optional): Path of the info file.
Default: None.
mask_anno_path (str, optional): Path of the mask_anno.
Default: None.
used_classes (list[str], optional): Classes have been used.
Default: None.
database_save_path (str, optional): Path to save database.
Default: None.
db_info_save_path (str, optional): Path to save db_info.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
with_mask (bool, optional): Whether to use mask.
Default: False.
"""
print(f'Create GT Database of {dataset_class_name}')
dataset_cfg = dict(
type=dataset_class_name, data_root=data_path, ann_file=info_path)
if dataset_class_name == 'KittiDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=with_mask,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
elif dataset_class_name == 'NuScenesDataset':
dataset_cfg.update(
use_valid_flag=True,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True)
])
elif dataset_class_name == 'WaymoDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=6,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
dataset = build_dataset(dataset_cfg)
if database_save_path is None:
database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
if db_info_save_path is None:
db_info_save_path = osp.join(data_path,
f'{info_prefix}_dbinfos_train.pkl')
mmcv.mkdir_or_exist(database_save_path)
all_db_infos = dict()
if with_mask:
coco = COCO(osp.join(data_path, mask_anno_path))
imgIds = coco.getImgIds()
file2id = dict()
for i in imgIds:
info = coco.loadImgs([i])[0]
file2id.update({info['file_name']: i})
group_counter = 0
for j in track_iter_progress(list(range(len(dataset)))):
input_dict = dataset.get_data_info(j)
dataset.pre_pipeline(input_dict)
example = dataset.pipeline(input_dict)
annos = example['ann_info']
image_idx = example['sample_idx']
points = example['points'].tensor.numpy()
gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
names = annos['gt_names']
group_dict = dict()
if 'group_ids' in annos:
group_ids = annos['group_ids']
else:
group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
if 'difficulty' in annos:
difficulty = annos['difficulty']
num_obj = gt_boxes_3d.shape[0]
point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
if with_mask:
# prepare masks
gt_boxes = annos['gt_bboxes']
img_path = osp.split(example['img_info']['filename'])[-1]
if img_path not in file2id.keys():
print(f'skip image {img_path} for empty mask')
continue
img_id = file2id[img_path]
kins_annIds = coco.getAnnIds(imgIds=img_id)
kins_raw_info = coco.loadAnns(kins_annIds)
kins_ann_info = _parse_coco_ann_info(kins_raw_info)
h, w = annos['img_shape'][:2]
gt_masks = [
_poly2mask(mask, h, w) for mask in kins_ann_info['masks']
]
# get mask inds based on iou mapping
bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
mask_inds = bbox_iou.argmax(axis=0)
valid_inds = (bbox_iou.max(axis=0) > 0.5)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches, object_masks = crop_image_patch(
gt_boxes, gt_masks, mask_inds, annos['img'])
for i in range(num_obj):
filename = f'{image_idx}_{names[i]}_{i}.bin'
abs_filepath = osp.join(database_save_path, filename)
rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)
# save point clouds and image patches for each object
gt_points = points[point_indices[:, i]]
gt_points[:, :3] -= gt_boxes_3d[i, :3]
if with_mask:
if object_masks[i].sum() == 0 or not valid_inds[i]:
# Skip object for empty or invalid mask
continue
img_patch_path = abs_filepath + '.png'
mask_patch_path = abs_filepath + '.mask.png'
mmcv.imwrite(object_img_patches[i], img_patch_path)
mmcv.imwrite(object_masks[i], mask_patch_path)
with open(abs_filepath, 'w') as f:
gt_points.tofile(f)
if (used_classes is None) or names[i] in used_classes:
db_info = {
'name': names[i],
'path': rel_filepath,
'image_idx': image_idx,
'gt_idx': i,
'box3d_lidar': gt_boxes_3d[i],
'num_points_in_gt': gt_points.shape[0],
'difficulty': difficulty[i],
}
local_group_id = group_ids[i]
# if local_group_id >= 0:
if local_group_id not in group_dict:
group_dict[local_group_id] = group_counter
group_counter += 1
db_info['group_id'] = group_dict[local_group_id]
if 'score' in annos:
db_info['score'] = annos['score'][i]
if with_mask:
db_info.update({'box2d_camera': gt_boxes[i]})
if names[i] in all_db_infos:
all_db_infos[names[i]].append(db_info)
else:
all_db_infos[names[i]] = [db_info]
for k, v in all_db_infos.items():
print(f'load {len(v)} {k} database infos')
with open(db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
class GTDatabaseCreater:
"""Given the raw data, generate the ground truth database. This is the
parallel version. For serialized version, please refer to
`create_groundtruth_database`
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str, optional): Path of the info file.
Default: None.
mask_anno_path (str, optional): Path of the mask_anno.
Default: None.
used_classes (list[str], optional): Classes have been used.
Default: None.
database_save_path (str, optional): Path to save database.
Default: None.
db_info_save_path (str, optional): Path to save db_info.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
with_mask (bool, optional): Whether to use mask.
Default: False.
num_worker (int, optional): the number of parallel workers to use.
Default: 8.
"""
def __init__(self,
dataset_class_name,
data_path,
info_prefix,
info_path=None,
mask_anno_path=None,
used_classes=None,
database_save_path=None,
db_info_save_path=None,
relative_path=True,
add_rgb=False,
lidar_only=False,
bev_only=False,
coors_range=None,
with_mask=False,
num_worker=8) -> None:
self.dataset_class_name = dataset_class_name
self.data_path = data_path
self.info_prefix = info_prefix
self.info_path = info_path
self.mask_anno_path = mask_anno_path
self.used_classes = used_classes
self.database_save_path = database_save_path
self.db_info_save_path = db_info_save_path
self.relative_path = relative_path
self.add_rgb = add_rgb
self.lidar_only = lidar_only
self.bev_only = bev_only
self.coors_range = coors_range
self.with_mask = with_mask
self.num_worker = num_worker
self.pipeline = None
def create_single(self, input_dict):
group_counter = 0
single_db_infos = dict()
example = self.pipeline(input_dict)
annos = example['ann_info']
image_idx = example['sample_idx']
points = example['points'].tensor.numpy()
gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
names = annos['gt_names']
group_dict = dict()
if 'group_ids' in annos:
group_ids = annos['group_ids']
else:
group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
if 'difficulty' in annos:
difficulty = annos['difficulty']
num_obj = gt_boxes_3d.shape[0]
point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
if self.with_mask:
# prepare masks
gt_boxes = annos['gt_bboxes']
img_path = osp.split(example['img_info']['filename'])[-1]
if img_path not in self.file2id.keys():
print(f'skip image {img_path} for empty mask')
return single_db_infos
img_id = self.file2id[img_path]
kins_annIds = self.coco.getAnnIds(imgIds=img_id)
kins_raw_info = self.coco.loadAnns(kins_annIds)
kins_ann_info = _parse_coco_ann_info(kins_raw_info)
h, w = annos['img_shape'][:2]
gt_masks = [
_poly2mask(mask, h, w) for mask in kins_ann_info['masks']
]
# get mask inds based on iou mapping
bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
mask_inds = bbox_iou.argmax(axis=0)
valid_inds = (bbox_iou.max(axis=0) > 0.5)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches, object_masks = crop_image_patch(
gt_boxes, gt_masks, mask_inds, annos['img'])
for i in range(num_obj):
filename = f'{image_idx}_{names[i]}_{i}.bin'
abs_filepath = osp.join(self.database_save_path, filename)
rel_filepath = osp.join(f'{self.info_prefix}_gt_database',
filename)
# save point clouds and image patches for each object
gt_points = points[point_indices[:, i]]
gt_points[:, :3] -= gt_boxes_3d[i, :3]
if self.with_mask:
if object_masks[i].sum() == 0 or not valid_inds[i]:
# Skip object for empty or invalid mask
continue
img_patch_path = abs_filepath + '.png'
mask_patch_path = abs_filepath + '.mask.png'
mmcv.imwrite(object_img_patches[i], img_patch_path)
mmcv.imwrite(object_masks[i], mask_patch_path)
with open(abs_filepath, 'w') as f:
gt_points.tofile(f)
if (self.used_classes is None) or names[i] in self.used_classes:
db_info = {
'name': names[i],
'path': rel_filepath,
'image_idx': image_idx,
'gt_idx': i,
'box3d_lidar': gt_boxes_3d[i],
'num_points_in_gt': gt_points.shape[0],
'difficulty': difficulty[i],
}
local_group_id = group_ids[i]
# if local_group_id >= 0:
if local_group_id not in group_dict:
group_dict[local_group_id] = group_counter
group_counter += 1
db_info['group_id'] = group_dict[local_group_id]
if 'score' in annos:
db_info['score'] = annos['score'][i]
if self.with_mask:
db_info.update({'box2d_camera': gt_boxes[i]})
if names[i] in single_db_infos:
single_db_infos[names[i]].append(db_info)
else:
single_db_infos[names[i]] = [db_info]
return single_db_infos
def create(self):
print(f'Create GT Database of {self.dataset_class_name}')
dataset_cfg = dict(
type=self.dataset_class_name,
data_root=self.data_path,
ann_file=self.info_path)
if self.dataset_class_name == 'KittiDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=self.with_mask,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
elif self.dataset_class_name == 'NuScenesDataset':
dataset_cfg.update(
use_valid_flag=True,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True)
])
elif self.dataset_class_name == 'WaymoDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=6,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
dataset = build_dataset(dataset_cfg)
self.pipeline = dataset.pipeline
if self.database_save_path is None:
self.database_save_path = osp.join(
self.data_path, f'{self.info_prefix}_gt_database')
if self.db_info_save_path is None:
self.db_info_save_path = osp.join(
self.data_path, f'{self.info_prefix}_dbinfos_train.pkl')
mmcv.mkdir_or_exist(self.database_save_path)
if self.with_mask:
self.coco = COCO(osp.join(self.data_path, self.mask_anno_path))
imgIds = self.coco.getImgIds()
self.file2id = dict()
for i in imgIds:
info = self.coco.loadImgs([i])[0]
self.file2id.update({info['file_name']: i})
def loop_dataset(i):
input_dict = dataset.get_data_info(i)
dataset.pre_pipeline(input_dict)
return input_dict
multi_db_infos = mmcv.track_parallel_progress(
self.create_single, ((loop_dataset(i)
for i in range(len(dataset))), len(dataset)),
self.num_worker)
print('Make global unique group id')
group_counter_offset = 0
all_db_infos = dict()
for single_db_infos in track_iter_progress(multi_db_infos):
group_id = -1
for name, name_db_infos in single_db_infos.items():
for db_info in name_db_infos:
group_id = max(group_id, db_info['group_id'])
db_info['group_id'] += group_counter_offset
if name not in all_db_infos:
all_db_infos[name] = []
all_db_infos[name].extend(name_db_infos)
group_counter_offset += (group_id + 1)
for k, v in all_db_infos.items():
print(f'load {len(v)} {k} database infos')
with open(self.db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
# Copyright (c) OpenMMLab. All rights reserved.
import os
import mmcv
import numpy as np
from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData
from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
def create_indoor_info_file(data_path,
pkl_prefix='sunrgbd',
save_path=None,
workers=4,
**kwargs):
"""Create indoor information file.
Get information of the raw data and save it to the pkl file.
Args:
data_path (str): Path of the data.
pkl_prefix (str, optional): Prefix of the pkl to be saved.
Default: 'sunrgbd'.
save_path (str, optional): Path of the pkl to be saved. Default: None.
workers (int, optional): Number of threads to be used. Default: 4.
kwargs (dict): Additional parameters for dataset-specific Data class.
May include `use_v1` for SUN RGB-D and `num_points`.
"""
assert os.path.exists(data_path)
assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \
f'unsupported indoor dataset {pkl_prefix}'
save_path = data_path if save_path is None else save_path
assert os.path.exists(save_path)
# generate infos for both detection and segmentation task
if pkl_prefix in ['sunrgbd', 'scannet']:
train_filename = os.path.join(save_path,
f'{pkl_prefix}_infos_train.pkl')
val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
if pkl_prefix == 'sunrgbd':
# SUN RGB-D has a train-val split
num_points = kwargs.get('num_points', -1)
use_v1 = kwargs.get('use_v1', False)
train_dataset = SUNRGBDData(
root_path=data_path,
split='train',
use_v1=use_v1,
num_points=num_points)
val_dataset = SUNRGBDData(
root_path=data_path,
split='val',
use_v1=use_v1,
num_points=num_points)
else:
# ScanNet has a train-val-test split
train_dataset = ScanNetData(root_path=data_path, split='train')
val_dataset = ScanNetData(root_path=data_path, split='val')
test_dataset = ScanNetData(root_path=data_path, split='test')
test_filename = os.path.join(save_path,
f'{pkl_prefix}_infos_test.pkl')
infos_train = train_dataset.get_infos(
num_workers=workers, has_label=True)
mmcv.dump(infos_train, train_filename, 'pkl')
print(f'{pkl_prefix} info train file is saved to {train_filename}')
infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
mmcv.dump(infos_val, val_filename, 'pkl')
print(f'{pkl_prefix} info val file is saved to {val_filename}')
if pkl_prefix == 'scannet':
infos_test = test_dataset.get_infos(
num_workers=workers, has_label=False)
mmcv.dump(infos_test, test_filename, 'pkl')
print(f'{pkl_prefix} info test file is saved to {test_filename}')
# generate infos for the semantic segmentation task
# e.g. re-sampled scene indexes and label weights
# scene indexes are used to re-sample rooms with different number of points
# label weights are used to balance classes with different number of points
if pkl_prefix == 'scannet':
# label weight computation function is adopted from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
num_points = kwargs.get('num_points', 8192)
train_dataset = ScanNetSegData(
data_root=data_path,
ann_file=train_filename,
split='train',
num_points=num_points,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
# TODO: do we need to generate on val set?
val_dataset = ScanNetSegData(
data_root=data_path,
ann_file=val_filename,
split='val',
num_points=num_points,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
# no need to generate for test set
train_dataset.get_seg_infos()
val_dataset.get_seg_infos()
elif pkl_prefix == 's3dis':
# S3DIS doesn't have a fixed train-val split
# it has 6 areas instead, so we generate info file for each of them
# in training, we will use dataset to wrap different areas
splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]]
for split in splits:
dataset = S3DISData(root_path=data_path, split=split)
info = dataset.get_infos(num_workers=workers, has_label=True)
filename = os.path.join(save_path,
f'{pkl_prefix}_infos_{split}.pkl')
mmcv.dump(info, filename, 'pkl')
print(f'{pkl_prefix} info {split} file is saved to {filename}')
num_points = kwargs.get('num_points', 4096)
seg_dataset = S3DISSegData(
data_root=data_path,
ann_file=filename,
split=split,
num_points=num_points,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
seg_dataset.get_seg_infos()
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from pathlib import Path
import mmcv
import numpy as np
from nuscenes.utils.geometry_utils import view_points
from mmdet3d.core.bbox import box_np_ops, points_cam2img
from .kitti_data_utils import WaymoInfoGatherer, get_kitti_image_info
from .nuscenes_converter import post_process_coords
kitti_categories = ('Pedestrian', 'Cyclist', 'Car')
def convert_to_kitti_info_version2(info):
"""convert kitti info v1 to v2 if possible.
Args:
info (dict): Info of the input kitti data.
- image (dict): image info
- calib (dict): calibration info
- point_cloud (dict): point cloud info
"""
if 'image' not in info or 'calib' not in info or 'point_cloud' not in info:
info['image'] = {
'image_shape': info['img_shape'],
'image_idx': info['image_idx'],
'image_path': info['img_path'],
}
info['calib'] = {
'R0_rect': info['calib/R0_rect'],
'Tr_velo_to_cam': info['calib/Tr_velo_to_cam'],
'P2': info['calib/P2'],
}
info['point_cloud'] = {
'velodyne_path': info['velodyne_path'],
}
def _read_imageset_file(path):
with open(path, 'r') as f:
lines = f.readlines()
return [int(line) for line in lines]
class _NumPointsInGTCalculater:
"""Calculate the number of points inside the ground truth box. This is the
parallel version. For the serialized version, please refer to
`_calculate_num_points_in_gt`.
Args:
data_path (str): Path of the data.
relative_path (bool): Whether to use relative path.
remove_outside (bool, optional): Whether to remove points which are
outside of image. Default: True.
num_features (int, optional): Number of features per point.
Default: False.
num_worker (int, optional): the number of parallel workers to use.
Default: 8.
"""
def __init__(self,
data_path,
relative_path,
remove_outside=True,
num_features=4,
num_worker=8) -> None:
self.data_path = data_path
self.relative_path = relative_path
self.remove_outside = remove_outside
self.num_features = num_features
self.num_worker = num_worker
def calculate_single(self, info):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
if self.relative_path:
v_path = str(Path(self.data_path) / pc_info['velodyne_path'])
else:
v_path = pc_info['velodyne_path']
points_v = np.fromfile(
v_path, dtype=np.float32,
count=-1).reshape([-1, self.num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if self.remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info['image_shape'])
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
return info
def calculate(self, infos):
ret_infos = mmcv.track_parallel_progress(self.calculate_single, infos,
self.num_worker)
for i, ret_info in enumerate(ret_infos):
infos[i] = ret_info
def _calculate_num_points_in_gt(data_path,
infos,
relative_path,
remove_outside=True,
num_features=4):
for info in mmcv.track_iter_progress(infos):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
if relative_path:
v_path = str(Path(data_path) / pc_info['velodyne_path'])
else:
v_path = pc_info['velodyne_path']
points_v = np.fromfile(
v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info['image_shape'])
# points_v = points_v[points_v[:, 0] > 0]
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
def create_kitti_info_file(data_path,
pkl_prefix='kitti',
with_plane=False,
save_path=None,
relative_path=True):
"""Create info file of KITTI dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str, optional): Prefix of the info file to be generated.
Default: 'kitti'.
with_plane (bool, optional): Whether to use plane information.
Default: False.
save_path (str, optional): Path to save the info file.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
"""
imageset_folder = Path(data_path) / 'ImageSets'
train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
print('Generate info. this may take several minutes.')
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
kitti_infos_train = get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
with_plane=with_plane,
image_ids=train_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
filename = save_path / f'{pkl_prefix}_infos_train.pkl'
print(f'Kitti info train file is saved to {filename}')
mmcv.dump(kitti_infos_train, filename)
kitti_infos_val = get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
with_plane=with_plane,
image_ids=val_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
filename = save_path / f'{pkl_prefix}_infos_val.pkl'
print(f'Kitti info val file is saved to {filename}')
mmcv.dump(kitti_infos_val, filename)
filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
print(f'Kitti info trainval file is saved to {filename}')
mmcv.dump(kitti_infos_train + kitti_infos_val, filename)
kitti_infos_test = get_kitti_image_info(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
with_plane=False,
image_ids=test_img_ids,
relative_path=relative_path)
filename = save_path / f'{pkl_prefix}_infos_test.pkl'
print(f'Kitti info test file is saved to {filename}')
mmcv.dump(kitti_infos_test, filename)
def create_waymo_info_file(data_path,
pkl_prefix='waymo',
save_path=None,
relative_path=True,
max_sweeps=5,
workers=8):
"""Create info file of waymo dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str, optional): Prefix of the info file to be generated.
Default: 'waymo'.
save_path (str, optional): Path to save the info file.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
max_sweeps (int, optional): Max sweeps before the detection frame
to be used. Default: 5.
"""
imageset_folder = Path(data_path) / 'ImageSets'
train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
print('Generate info. this may take several minutes.')
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
waymo_infos_gatherer_trainval = WaymoInfoGatherer(
data_path,
training=True,
velodyne=True,
calib=True,
pose=True,
relative_path=relative_path,
max_sweeps=max_sweeps,
num_worker=workers)
waymo_infos_gatherer_test = WaymoInfoGatherer(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
pose=True,
relative_path=relative_path,
max_sweeps=max_sweeps,
num_worker=workers)
num_points_in_gt_calculater = _NumPointsInGTCalculater(
data_path,
relative_path,
num_features=6,
remove_outside=False,
num_worker=workers)
waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids)
num_points_in_gt_calculater.calculate(waymo_infos_train)
filename = save_path / f'{pkl_prefix}_infos_train.pkl'
print(f'Waymo info train file is saved to {filename}')
mmcv.dump(waymo_infos_train, filename)
waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids)
num_points_in_gt_calculater.calculate(waymo_infos_val)
filename = save_path / f'{pkl_prefix}_infos_val.pkl'
print(f'Waymo info val file is saved to {filename}')
mmcv.dump(waymo_infos_val, filename)
filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
print(f'Waymo info trainval file is saved to {filename}')
mmcv.dump(waymo_infos_train + waymo_infos_val, filename)
waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids)
filename = save_path / f'{pkl_prefix}_infos_test.pkl'
print(f'Waymo info test file is saved to {filename}')
mmcv.dump(waymo_infos_test, filename)
def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False,
num_features=4,
front_camera_id=2):
"""Create reduced point clouds for given info.
Args:
data_path (str): Path of original data.
info_path (str): Path of data info.
save_path (str, optional): Path to save reduced point cloud
data. Default: None.
back (bool, optional): Whether to flip the points to back.
Default: False.
num_features (int, optional): Number of point features. Default: 4.
front_camera_id (int, optional): The referenced/front camera ID.
Default: 2.
"""
kitti_infos = mmcv.load(info_path)
for info in mmcv.track_iter_progress(kitti_infos):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
v_path = pc_info['velodyne_path']
v_path = Path(data_path) / v_path
points_v = np.fromfile(
str(v_path), dtype=np.float32,
count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
if front_camera_id == 2:
P2 = calib['P2']
else:
P2 = calib[f'P{str(front_camera_id)}']
Trv2c = calib['Tr_velo_to_cam']
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if back:
points_v[:, 0] = -points_v[:, 0]
points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
image_info['image_shape'])
if save_path is None:
save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')
if not save_dir.exists():
save_dir.mkdir()
save_filename = save_dir / v_path.name
# save_filename = str(v_path) + '_reduced'
if back:
save_filename += '_back'
else:
save_filename = str(Path(save_path) / v_path.name)
if back:
save_filename += '_back'
with open(save_filename, 'w') as f:
points_v.tofile(f)
def create_reduced_point_cloud(data_path,
pkl_prefix,
train_info_path=None,
val_info_path=None,
test_info_path=None,
save_path=None,
with_back=False):
"""Create reduced point clouds for training/validation/testing.
Args:
data_path (str): Path of original data.
pkl_prefix (str): Prefix of info files.
train_info_path (str, optional): Path of training set info.
Default: None.
val_info_path (str, optional): Path of validation set info.
Default: None.
test_info_path (str, optional): Path of test set info.
Default: None.
save_path (str, optional): Path to save reduced point cloud data.
Default: None.
with_back (bool, optional): Whether to flip the points to back.
Default: False.
"""
if train_info_path is None:
train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'
if val_info_path is None:
val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl'
if test_info_path is None:
test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl'
print('create reduced point cloud for training set')
_create_reduced_point_cloud(data_path, train_info_path, save_path)
print('create reduced point cloud for validation set')
_create_reduced_point_cloud(data_path, val_info_path, save_path)
print('create reduced point cloud for testing set')
_create_reduced_point_cloud(data_path, test_info_path, save_path)
if with_back:
_create_reduced_point_cloud(
data_path, train_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, val_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, test_info_path, save_path, back=True)
def export_2d_annotation(root_path, info_path, mono3d=True):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
mono3d (bool, optional): Whether to export mono3d annotation.
Default: True.
"""
# get bbox annotations for camera
kitti_infos = mmcv.load(info_path)
cat2Ids = [
dict(id=kitti_categories.index(cat_name), name=cat_name)
for cat_name in kitti_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
from os import path as osp
for info in mmcv.track_iter_progress(kitti_infos):
coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d)
(height, width,
_) = mmcv.imread(osp.join(root_path,
info['image']['image_path'])).shape
coco_2d_dict['images'].append(
dict(
file_name=info['image']['image_path'],
id=info['image']['image_idx'],
Tri2v=info['calib']['Tr_imu_to_velo'],
Trv2c=info['calib']['Tr_velo_to_cam'],
rect=info['calib']['R0_rect'],
cam_intrinsic=info['calib']['P2'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
if mono3d:
json_prefix = f'{info_path[:-4]}_mono3d'
else:
json_prefix = f'{info_path[:-4]}'
mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def get_2d_boxes(info, occluded, mono3d=True):
"""Get the 2D annotation records for a given info.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
P2 = info['calib']['P2']
repro_recs = []
# if no annotations in info (test dataset), then return
if 'annos' not in info:
return repro_recs
# Get all the annotation with the specified visibilties.
ann_dicts = info['annos']
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
offset = (info['calib']['P2'][0, 3] - info['calib']['P0'][0, 3]) \
/ info['calib']['P2'][0, 0]
loc_3d = np.copy(loc)
loc_3d[0, 0] += offset
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
camera_intrinsic = P2
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_cam3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velo_cam3d'] = -1 # no velocity in KITTI
center3d = np.array(loc).reshape([1, 3])
center2d = points_cam2img(
center3d, camera_intrinsic, with_depth=True)
repro_rec['center2d'] = center2d.squeeze().tolist()
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['center2d'][2] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from concurrent import futures as futures
from os import path as osp
from pathlib import Path
import mmcv
import numpy as np
from PIL import Image
from skimage import io
def get_image_index_str(img_idx, use_prefix_id=False):
if use_prefix_id:
return '{:07d}'.format(img_idx)
else:
return '{:06d}'.format(img_idx)
def get_kitti_info_path(idx,
prefix,
info_type='image_2',
file_tail='.png',
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
img_idx_str = get_image_index_str(idx, use_prefix_id)
img_idx_str += file_tail
prefix = Path(prefix)
if training:
file_path = Path('training') / info_type / img_idx_str
else:
file_path = Path('testing') / info_type / img_idx_str
if exist_check and not (prefix / file_path).exists():
raise ValueError('file not exist: {}'.format(file_path))
if relative_path:
return str(file_path)
else:
return str(prefix / file_path)
def get_image_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
info_type='image_2',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.png', training,
relative_path, exist_check, use_prefix_id)
def get_label_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
info_type='label_2',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_plane_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
info_type='planes',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_velodyne_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,
relative_path, exist_check, use_prefix_id)
def get_calib_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_pose_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'pose', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_timestamp_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'timestamp', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_label_anno(label_path):
annotations = {}
annotations.update({
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': []
})
with open(label_path, 'r') as f:
lines = f.readlines()
# if len(lines) == 0 or len(lines[0]) < 15:
# content = []
# else:
content = [line.strip().split(' ') for line in lines]
num_objects = len([x[0] for x in content if x[0] != 'DontCare'])
annotations['name'] = np.array([x[0] for x in content])
num_gt = len(annotations['name'])
annotations['truncated'] = np.array([float(x[1]) for x in content])
annotations['occluded'] = np.array([int(x[2]) for x in content])
annotations['alpha'] = np.array([float(x[3]) for x in content])
annotations['bbox'] = np.array([[float(info) for info in x[4:8]]
for x in content]).reshape(-1, 4)
# dimensions will convert hwl format to standard lhw(camera) format.
annotations['dimensions'] = np.array([[float(info) for info in x[8:11]]
for x in content
]).reshape(-1, 3)[:, [2, 0, 1]]
annotations['location'] = np.array([[float(info) for info in x[11:14]]
for x in content]).reshape(-1, 3)
annotations['rotation_y'] = np.array([float(x[14])
for x in content]).reshape(-1)
if len(content) != 0 and len(content[0]) == 16: # have score
annotations['score'] = np.array([float(x[15]) for x in content])
else:
annotations['score'] = np.zeros((annotations['bbox'].shape[0], ))
index = list(range(num_objects)) + [-1] * (num_gt - num_objects)
annotations['index'] = np.array(index, dtype=np.int32)
annotations['group_ids'] = np.arange(num_gt, dtype=np.int32)
return annotations
def _extend_matrix(mat):
mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)
return mat
def get_kitti_image_info(path,
training=True,
label_info=True,
velodyne=False,
calib=False,
with_plane=False,
image_ids=7481,
extend_matrix=True,
num_worker=8,
relative_path=True,
with_imageshape=True):
"""
KITTI annotation format version 2:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 4
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam: ...
P2: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
root_path = Path(path)
if not isinstance(image_ids, list):
image_ids = list(range(image_ids))
def map_func(idx):
info = {}
pc_info = {'num_features': 4}
calib_info = {}
image_info = {'image_idx': idx}
annotations = None
if velodyne:
pc_info['velodyne_path'] = get_velodyne_path(
idx, path, training, relative_path)
image_info['image_path'] = get_image_path(idx, path, training,
relative_path)
if with_imageshape:
img_path = image_info['image_path']
if relative_path:
img_path = str(root_path / img_path)
image_info['image_shape'] = np.array(
io.imread(img_path).shape[:2], dtype=np.int32)
if label_info:
label_path = get_label_path(idx, path, training, relative_path)
if relative_path:
label_path = str(root_path / label_path)
annotations = get_label_anno(label_path)
info['image'] = image_info
info['point_cloud'] = pc_info
if calib:
calib_path = get_calib_path(
idx, path, training, relative_path=False)
with open(calib_path, 'r') as f:
lines = f.readlines()
P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
]).reshape([3, 4])
P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
]).reshape([3, 4])
P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
]).reshape([3, 4])
P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
]).reshape([3, 4])
if extend_matrix:
P0 = _extend_matrix(P0)
P1 = _extend_matrix(P1)
P2 = _extend_matrix(P2)
P3 = _extend_matrix(P3)
R0_rect = np.array([
float(info) for info in lines[4].split(' ')[1:10]
]).reshape([3, 3])
if extend_matrix:
rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
rect_4x4[3, 3] = 1.
rect_4x4[:3, :3] = R0_rect
else:
rect_4x4 = R0_rect
Tr_velo_to_cam = np.array([
float(info) for info in lines[5].split(' ')[1:13]
]).reshape([3, 4])
Tr_imu_to_velo = np.array([
float(info) for info in lines[6].split(' ')[1:13]
]).reshape([3, 4])
if extend_matrix:
Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo)
calib_info['P0'] = P0
calib_info['P1'] = P1
calib_info['P2'] = P2
calib_info['P3'] = P3
calib_info['R0_rect'] = rect_4x4
calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
calib_info['Tr_imu_to_velo'] = Tr_imu_to_velo
info['calib'] = calib_info
if with_plane:
plane_path = get_plane_path(idx, path, training, relative_path)
if relative_path:
plane_path = str(root_path / plane_path)
lines = mmcv.list_from_file(plane_path)
info['plane'] = np.array([float(i) for i in lines[3].split()])
if annotations is not None:
info['annos'] = annotations
add_difficulty_to_annos(info)
return info
with futures.ThreadPoolExecutor(num_worker) as executor:
image_infos = executor.map(map_func, image_ids)
return list(image_infos)
class WaymoInfoGatherer:
"""
Parallel version of waymo dataset information gathering.
Waymo annotation format version like KITTI:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 6
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam0: ...
P0: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
def __init__(self,
path,
training=True,
label_info=True,
velodyne=False,
calib=False,
pose=False,
extend_matrix=True,
num_worker=8,
relative_path=True,
with_imageshape=True,
max_sweeps=5) -> None:
self.path = path
self.training = training
self.label_info = label_info
self.velodyne = velodyne
self.calib = calib
self.pose = pose
self.extend_matrix = extend_matrix
self.num_worker = num_worker
self.relative_path = relative_path
self.with_imageshape = with_imageshape
self.max_sweeps = max_sweeps
def gather_single(self, idx):
root_path = Path(self.path)
info = {}
pc_info = {'num_features': 6}
calib_info = {}
image_info = {'image_idx': idx}
annotations = None
if self.velodyne:
pc_info['velodyne_path'] = get_velodyne_path(
idx,
self.path,
self.training,
self.relative_path,
use_prefix_id=True)
with open(
get_timestamp_path(
idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)) as f:
info['timestamp'] = np.int64(f.read())
image_info['image_path'] = get_image_path(
idx,
self.path,
self.training,
self.relative_path,
info_type='image_0',
use_prefix_id=True)
if self.with_imageshape:
img_path = image_info['image_path']
if self.relative_path:
img_path = str(root_path / img_path)
# io using PIL is significantly faster than skimage
w, h = Image.open(img_path).size
image_info['image_shape'] = np.array((h, w), dtype=np.int32)
if self.label_info:
label_path = get_label_path(
idx,
self.path,
self.training,
self.relative_path,
info_type='label_all',
use_prefix_id=True)
if self.relative_path:
label_path = str(root_path / label_path)
annotations = get_label_anno(label_path)
info['image'] = image_info
info['point_cloud'] = pc_info
if self.calib:
calib_path = get_calib_path(
idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)
with open(calib_path, 'r') as f:
lines = f.readlines()
P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
]).reshape([3, 4])
P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
]).reshape([3, 4])
P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
]).reshape([3, 4])
P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
]).reshape([3, 4])
P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]
]).reshape([3, 4])
if self.extend_matrix:
P0 = _extend_matrix(P0)
P1 = _extend_matrix(P1)
P2 = _extend_matrix(P2)
P3 = _extend_matrix(P3)
P4 = _extend_matrix(P4)
R0_rect = np.array([
float(info) for info in lines[5].split(' ')[1:10]
]).reshape([3, 3])
if self.extend_matrix:
rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
rect_4x4[3, 3] = 1.
rect_4x4[:3, :3] = R0_rect
else:
rect_4x4 = R0_rect
Tr_velo_to_cam = np.array([
float(info) for info in lines[6].split(' ')[1:13]
]).reshape([3, 4])
if self.extend_matrix:
Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
calib_info['P0'] = P0
calib_info['P1'] = P1
calib_info['P2'] = P2
calib_info['P3'] = P3
calib_info['P4'] = P4
calib_info['R0_rect'] = rect_4x4
calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
info['calib'] = calib_info
if self.pose:
pose_path = get_pose_path(
idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)
info['pose'] = np.loadtxt(pose_path)
if annotations is not None:
info['annos'] = annotations
info['annos']['camera_id'] = info['annos'].pop('score')
add_difficulty_to_annos(info)
sweeps = []
prev_idx = idx
while len(sweeps) < self.max_sweeps:
prev_info = {}
prev_idx -= 1
prev_info['velodyne_path'] = get_velodyne_path(
prev_idx,
self.path,
self.training,
self.relative_path,
exist_check=False,
use_prefix_id=True)
if_prev_exists = osp.exists(
Path(self.path) / prev_info['velodyne_path'])
if if_prev_exists:
with open(
get_timestamp_path(
prev_idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)) as f:
prev_info['timestamp'] = np.int64(f.read())
prev_pose_path = get_pose_path(
prev_idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)
prev_info['pose'] = np.loadtxt(prev_pose_path)
sweeps.append(prev_info)
else:
break
info['sweeps'] = sweeps
return info
def gather(self, image_ids):
if not isinstance(image_ids, list):
image_ids = list(range(image_ids))
image_infos = mmcv.track_parallel_progress(self.gather_single,
image_ids, self.num_worker)
return list(image_infos)
def kitti_anno_to_label_file(annos, folder):
folder = Path(folder)
for anno in annos:
image_idx = anno['metadata']['image_idx']
label_lines = []
for j in range(anno['bbox'].shape[0]):
label_dict = {
'name': anno['name'][j],
'alpha': anno['alpha'][j],
'bbox': anno['bbox'][j],
'location': anno['location'][j],
'dimensions': anno['dimensions'][j],
'rotation_y': anno['rotation_y'][j],
'score': anno['score'][j],
}
label_line = kitti_result_line(label_dict)
label_lines.append(label_line)
label_file = folder / f'{get_image_index_str(image_idx)}.txt'
label_str = '\n'.join(label_lines)
with open(label_file, 'w') as f:
f.write(label_str)
def add_difficulty_to_annos(info):
min_height = [40, 25,
25] # minimum height for evaluated groundtruth/detections
max_occlusion = [
0, 1, 2
] # maximum occlusion level of the groundtruth used for evaluation
max_trunc = [
0.15, 0.3, 0.5
] # maximum truncation level of the groundtruth used for evaluation
annos = info['annos']
dims = annos['dimensions'] # lhw format
bbox = annos['bbox']
height = bbox[:, 3] - bbox[:, 1]
occlusion = annos['occluded']
truncation = annos['truncated']
diff = []
easy_mask = np.ones((len(dims), ), dtype=np.bool)
moderate_mask = np.ones((len(dims), ), dtype=np.bool)
hard_mask = np.ones((len(dims), ), dtype=np.bool)
i = 0
for h, o, t in zip(height, occlusion, truncation):
if o > max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]:
easy_mask[i] = False
if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]:
moderate_mask[i] = False
if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]:
hard_mask[i] = False
i += 1
is_easy = easy_mask
is_moderate = np.logical_xor(easy_mask, moderate_mask)
is_hard = np.logical_xor(hard_mask, moderate_mask)
for i in range(len(dims)):
if is_easy[i]:
diff.append(0)
elif is_moderate[i]:
diff.append(1)
elif is_hard[i]:
diff.append(2)
else:
diff.append(-1)
annos['difficulty'] = np.array(diff, np.int32)
return diff
def kitti_result_line(result_dict, precision=4):
prec_float = '{' + ':.{}f'.format(precision) + '}'
res_line = []
all_field_default = OrderedDict([
('name', None),
('truncated', -1),
('occluded', -1),
('alpha', -10),
('bbox', None),
('dimensions', [-1, -1, -1]),
('location', [-1000, -1000, -1000]),
('rotation_y', -10),
('score', 0.0),
])
res_dict = [(key, None) for key, val in all_field_default.items()]
res_dict = OrderedDict(res_dict)
for key, val in result_dict.items():
if all_field_default[key] is None and val is None:
raise ValueError('you must specify a value for {}'.format(key))
res_dict[key] = val
for key, val in res_dict.items():
if key == 'name':
res_line.append(val)
elif key in ['truncated', 'alpha', 'rotation_y', 'score']:
if val is None:
res_line.append(str(all_field_default[key]))
else:
res_line.append(prec_float.format(val))
elif key == 'occluded':
if val is None:
res_line.append(str(all_field_default[key]))
else:
res_line.append('{}'.format(val))
elif key in ['bbox', 'dimensions', 'location']:
if val is None:
res_line += [str(v) for v in all_field_default[key]]
else:
res_line += [prec_float.format(v) for v in val]
else:
raise ValueError('unknown key. supported key:{}'.format(
res_dict.keys()))
return ' '.join(res_line)
# Copyright (c) OpenMMLab. All rights reserved.
import os
from logging import warning
from os import path as osp
import mmcv
import numpy as np
from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
from pyquaternion import Quaternion
from mmdet3d.datasets import LyftDataset
from .nuscenes_converter import (get_2d_boxes, get_available_scenes,
obtain_sensor2top)
lyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
'motorcycle', 'bicycle', 'pedestrian', 'animal')
def create_lyft_infos(root_path,
info_prefix,
version='v1.01-train',
max_sweeps=10):
"""Create info file of lyft dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
version (str, optional): Version of the data.
Default: 'v1.01-train'.
max_sweeps (int, optional): Max number of sweeps.
Default: 10.
"""
lyft = Lyft(
data_path=osp.join(root_path, version),
json_path=osp.join(root_path, version, version),
verbose=True)
available_vers = ['v1.01-train', 'v1.01-test']
assert version in available_vers
if version == 'v1.01-train':
train_scenes = mmcv.list_from_file('data/lyft/train.txt')
val_scenes = mmcv.list_from_file('data/lyft/val.txt')
elif version == 'v1.01-test':
train_scenes = mmcv.list_from_file('data/lyft/test.txt')
val_scenes = []
else:
raise ValueError('unknown')
# filter existing scenes.
available_scenes = get_available_scenes(lyft)
available_scene_names = [s['name'] for s in available_scenes]
train_scenes = list(
filter(lambda x: x in available_scene_names, train_scenes))
val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
train_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in train_scenes
])
val_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in val_scenes
])
test = 'test' in version
if test:
print(f'test scene: {len(train_scenes)}')
else:
print(f'train scene: {len(train_scenes)}, \
val scene: {len(val_scenes)}')
train_lyft_infos, val_lyft_infos = _fill_trainval_infos(
lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
metadata = dict(version=version)
if test:
print(f'test sample: {len(train_lyft_infos)}')
data = dict(infos=train_lyft_infos, metadata=metadata)
info_name = f'{info_prefix}_infos_test'
info_path = osp.join(root_path, f'{info_name}.pkl')
mmcv.dump(data, info_path)
else:
print(f'train sample: {len(train_lyft_infos)}, \
val sample: {len(val_lyft_infos)}')
data = dict(infos=train_lyft_infos, metadata=metadata)
train_info_name = f'{info_prefix}_infos_train'
info_path = osp.join(root_path, f'{train_info_name}.pkl')
mmcv.dump(data, info_path)
data['infos'] = val_lyft_infos
val_info_name = f'{info_prefix}_infos_val'
info_val_path = osp.join(root_path, f'{val_info_name}.pkl')
mmcv.dump(data, info_val_path)
def _fill_trainval_infos(lyft,
train_scenes,
val_scenes,
test=False,
max_sweeps=10):
"""Generate the train/val infos from the raw data.
Args:
lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.
train_scenes (list[str]): Basic information of training scenes.
val_scenes (list[str]): Basic information of validation scenes.
test (bool, optional): Whether use the test mode. In the test mode, no
annotations can be accessed. Default: False.
max_sweeps (int, optional): Max number of sweeps. Default: 10.
Returns:
tuple[list[dict]]: Information of training set and
validation set that will be saved to the info file.
"""
train_lyft_infos = []
val_lyft_infos = []
for sample in mmcv.track_iter_progress(lyft.sample):
lidar_token = sample['data']['LIDAR_TOP']
sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
cs_record = lyft.get('calibrated_sensor',
sd_rec['calibrated_sensor_token'])
pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token'])
abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token)
# nuScenes devkit returns more convenient relative paths while
# lyft devkit returns absolute paths
abs_lidar_path = str(abs_lidar_path) # absolute path
lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1]
# relative path
mmcv.check_file_exist(lidar_path)
info = {
'lidar_path': lidar_path,
'token': sample['token'],
'sweeps': [],
'cams': dict(),
'lidar2ego_translation': cs_record['translation'],
'lidar2ego_rotation': cs_record['rotation'],
'ego2global_translation': pose_record['translation'],
'ego2global_rotation': pose_record['rotation'],
'timestamp': sample['timestamp'],
}
l2e_r = info['lidar2ego_rotation']
l2e_t = info['lidar2ego_translation']
e2g_r = info['ego2global_rotation']
e2g_t = info['ego2global_translation']
l2e_r_mat = Quaternion(l2e_r).rotation_matrix
e2g_r_mat = Quaternion(e2g_r).rotation_matrix
# obtain 6 image's information per frame
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
for cam in camera_types:
cam_token = sample['data'][cam]
cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token)
cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat,
e2g_t, e2g_r_mat, cam)
cam_info.update(cam_intrinsic=cam_intrinsic)
info['cams'].update({cam: cam_info})
# obtain sweeps for a single key-frame
sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
sweeps = []
while len(sweeps) < max_sweeps:
if not sd_rec['prev'] == '':
sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t,
l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
sweeps.append(sweep)
sd_rec = lyft.get('sample_data', sd_rec['prev'])
else:
break
info['sweeps'] = sweeps
# obtain annotation
if not test:
annotations = [
lyft.get('sample_annotation', token)
for token in sample['anns']
]
locs = np.array([b.center for b in boxes]).reshape(-1, 3)
dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
rots = np.array([b.orientation.yaw_pitch_roll[0]
for b in boxes]).reshape(-1, 1)
names = [b.name for b in boxes]
for i in range(len(names)):
if names[i] in LyftDataset.NameMapping:
names[i] = LyftDataset.NameMapping[names[i]]
names = np.array(names)
# we need to convert box size to
# the format of our lidar coordinate system
# which is x_size, y_size, z_size (corresponding to l, w, h)
gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
assert len(gt_boxes) == len(
annotations), f'{len(gt_boxes)}, {len(annotations)}'
info['gt_boxes'] = gt_boxes
info['gt_names'] = names
info['num_lidar_pts'] = np.array(
[a['num_lidar_pts'] for a in annotations])
info['num_radar_pts'] = np.array(
[a['num_radar_pts'] for a in annotations])
if sample['scene_token'] in train_scenes:
train_lyft_infos.append(info)
else:
val_lyft_infos.append(info)
return train_lyft_infos, val_lyft_infos
def export_2d_annotation(root_path, info_path, version):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
version (str): Dataset version.
"""
warning.warn('DeprecationWarning: 2D annotations are not used on the '
'Lyft dataset. The function export_2d_annotation will be '
'deprecated.')
# get bbox annotations for camera
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
lyft_infos = mmcv.load(info_path)['infos']
lyft = Lyft(
data_path=osp.join(root_path, version),
json_path=osp.join(root_path, version, version),
verbose=True)
# info_2d_list = []
cat2Ids = [
dict(id=lyft_categories.index(cat_name), name=cat_name)
for cat_name in lyft_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
for info in mmcv.track_iter_progress(lyft_infos):
for cam in camera_types:
cam_info = info['cams'][cam]
coco_infos = get_2d_boxes(
lyft,
cam_info['sample_data_token'],
visibilities=['', '1', '2', '3', '4'])
(height, width, _) = mmcv.imread(cam_info['data_path']).shape
coco_2d_dict['images'].append(
dict(
file_name=cam_info['data_path'],
id=cam_info['sample_data_token'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import numpy as np
def fix_lyft(root_folder='./data/lyft', version='v1.01'):
# refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa
lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
root_folder = os.path.join(root_folder, f'{version}-train')
lidar_path = os.path.join(root_folder, lidar_path)
assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
f'dataset and make sure {lidar_path} is present.'
points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
try:
points.reshape([-1, 5])
print(f'This fix is not required for version {version}.')
except ValueError:
new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
new_points.tofile(lidar_path)
print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
parser.add_argument(
'--root-folder',
type=str,
default='./data/lyft',
help='specify the root path of Lyft dataset')
parser.add_argument(
'--version',
type=str,
default='v1.01',
help='specify Lyft dataset version')
args = parser.parse_args()
if __name__ == '__main__':
fix_lyft(root_folder=args.root_folder, version=args.version)
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import base64
from os import path as osp
import mmcv
import numpy as np
from nuimages import NuImages
from nuimages.utils.utils import mask_decode, name_to_index_mapping
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
NAME_MAPPING = {
'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.car': 'car',
'vehicle.construction': 'construction_vehicle',
'vehicle.motorcycle': 'motorcycle',
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'human.pedestrian.police_officer': 'pedestrian',
'movable_object.trafficcone': 'traffic_cone',
'vehicle.trailer': 'trailer',
'vehicle.truck': 'truck',
}
def parse_args():
parser = argparse.ArgumentParser(description='Data converter arg parser')
parser.add_argument(
'--data-root',
type=str,
default='./data/nuimages',
help='specify the root path of dataset')
parser.add_argument(
'--version',
type=str,
nargs='+',
default=['v1.0-mini'],
required=False,
help='specify the dataset version')
parser.add_argument(
'--out-dir',
type=str,
default='./data/nuimages/annotations/',
required=False,
help='path to save the exported json')
parser.add_argument(
'--nproc',
type=int,
default=4,
required=False,
help='workers to process semantic masks')
parser.add_argument('--extra-tag', type=str, default='nuimages')
args = parser.parse_args()
return args
def get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root):
"""Get semantic segmentation map for an image.
Args:
nuim (obj:`NuImages`): NuImages dataset object
img_info (dict): Meta information of img
Returns:
np.ndarray: Semantic segmentation map of the image
"""
sd_token = img_info['token']
image_id = img_info['id']
name_to_index = name_to_index_mapping(nuim.category)
# Get image data.
width, height = img_info['width'], img_info['height']
semseg_mask = np.zeros((height, width)).astype('uint8')
# Load stuff / surface regions.
surface_anns = [
o for o in nuim.surface_ann if o['sample_data_token'] == sd_token
]
# Draw stuff / surface regions.
for ann in surface_anns:
# Get color and mask.
category_token = ann['category_token']
category_name = nuim.get('category', category_token)['name']
if ann['mask'] is None:
continue
mask = mask_decode(ann['mask'])
# Draw mask for semantic segmentation.
semseg_mask[mask == 1] = name_to_index[category_name]
# Load object instances.
object_anns = [
o for o in nuim.object_ann if o['sample_data_token'] == sd_token
]
# Sort by token to ensure that objects always appear in the
# instance mask in the same order.
object_anns = sorted(object_anns, key=lambda k: k['token'])
# Draw object instances.
# The 0 index is reserved for background; thus, the instances
# should start from index 1.
annotations = []
for i, ann in enumerate(object_anns, start=1):
# Get color, box, mask and name.
category_token = ann['category_token']
category_name = nuim.get('category', category_token)['name']
if ann['mask'] is None:
continue
mask = mask_decode(ann['mask'])
# Draw masks for semantic segmentation and instance segmentation.
semseg_mask[mask == 1] = name_to_index[category_name]
if category_name in NAME_MAPPING:
cat_name = NAME_MAPPING[category_name]
cat_id = cat2id[cat_name]
x_min, y_min, x_max, y_max = ann['bbox']
# encode calibrated instance mask
mask_anno = dict()
mask_anno['counts'] = base64.b64decode(
ann['mask']['counts']).decode()
mask_anno['size'] = ann['mask']['size']
data_anno = dict(
image_id=image_id,
category_id=cat_id,
bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
area=(x_max - x_min) * (y_max - y_min),
segmentation=mask_anno,
iscrowd=0)
annotations.append(data_anno)
# after process, save semantic masks
img_filename = img_info['file_name']
seg_filename = img_filename.replace('jpg', 'png')
seg_filename = osp.join(seg_root, seg_filename)
mmcv.imwrite(semseg_mask, seg_filename)
return annotations, np.max(semseg_mask)
def export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc):
print('Process category information')
categories = []
categories = [
dict(id=nus_categories.index(cat_name), name=cat_name)
for cat_name in nus_categories
]
cat2id = {k_v['name']: k_v['id'] for k_v in categories}
images = []
print('Process image meta information...')
for sample_info in mmcv.track_iter_progress(nuim.sample_data):
if sample_info['is_key_frame']:
img_idx = len(images)
images.append(
dict(
id=img_idx,
token=sample_info['token'],
file_name=sample_info['filename'],
width=sample_info['width'],
height=sample_info['height']))
seg_root = f'{out_dir}semantic_masks'
mmcv.mkdir_or_exist(seg_root)
mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated'))
global process_img_anno
def process_img_anno(img_info):
single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id,
out_dir, data_root,
seg_root)
return single_img_annos, max_cls_id
print('Process img annotations...')
if nproc > 1:
outputs = mmcv.track_parallel_progress(
process_img_anno, images, nproc=nproc)
else:
outputs = []
for img_info in mmcv.track_iter_progress(images):
outputs.append(process_img_anno(img_info))
# Determine the index of object annotation
print('Process annotation information...')
annotations = []
max_cls_ids = []
for single_img_annos, max_cls_id in outputs:
max_cls_ids.append(max_cls_id)
for img_anno in single_img_annos:
img_anno.update(id=len(annotations))
annotations.append(img_anno)
max_cls_id = max(max_cls_ids)
print(f'Max ID of class in the semantic map: {max_cls_id}')
coco_format_json = dict(
images=images, annotations=annotations, categories=categories)
mmcv.mkdir_or_exist(out_dir)
out_file = osp.join(out_dir, f'{extra_tag}_{version}.json')
print(f'Annotation dumped to {out_file}')
mmcv.dump(coco_format_json, out_file)
def main():
args = parse_args()
for version in args.version:
nuim = NuImages(
dataroot=args.data_root, version=version, verbose=True, lazy=True)
export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag,
version, args.nproc)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import os
from collections import OrderedDict
from os import path as osp
from typing import List, Tuple, Union
import mmcv
import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.datasets import NuScenesDataset
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
nus_attributes = ('cycle.with_rider', 'cycle.without_rider',
'pedestrian.moving', 'pedestrian.standing',
'pedestrian.sitting_lying_down', 'vehicle.moving',
'vehicle.parked', 'vehicle.stopped', 'None')
def create_nuscenes_infos(root_path,
info_prefix,
version='v1.0-trainval',
max_sweeps=10):
"""Create info file of nuscene dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
version (str, optional): Version of the data.
Default: 'v1.0-trainval'.
max_sweeps (int, optional): Max number of sweeps.
Default: 10.
"""
from nuscenes.nuscenes import NuScenes
nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
from nuscenes.utils import splits
available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
assert version in available_vers
if version == 'v1.0-trainval':
train_scenes = splits.train
val_scenes = splits.val
elif version == 'v1.0-test':
train_scenes = splits.test
val_scenes = []
elif version == 'v1.0-mini':
train_scenes = splits.mini_train
val_scenes = splits.mini_val
else:
raise ValueError('unknown')
# filter existing scenes.
available_scenes = get_available_scenes(nusc)
available_scene_names = [s['name'] for s in available_scenes]
train_scenes = list(
filter(lambda x: x in available_scene_names, train_scenes))
val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
train_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in train_scenes
])
val_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in val_scenes
])
test = 'test' in version
if test:
print('test scene: {}'.format(len(train_scenes)))
else:
print('train scene: {}, val scene: {}'.format(
len(train_scenes), len(val_scenes)))
train_nusc_infos, val_nusc_infos = _fill_trainval_infos(
nusc, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
metadata = dict(version=version)
if test:
print('test sample: {}'.format(len(train_nusc_infos)))
data = dict(infos=train_nusc_infos, metadata=metadata)
info_path = osp.join(root_path,
'{}_infos_test.pkl'.format(info_prefix))
mmcv.dump(data, info_path)
else:
print('train sample: {}, val sample: {}'.format(
len(train_nusc_infos), len(val_nusc_infos)))
data = dict(infos=train_nusc_infos, metadata=metadata)
info_path = osp.join(root_path,
'{}_infos_train.pkl'.format(info_prefix))
mmcv.dump(data, info_path)
data['infos'] = val_nusc_infos
info_val_path = osp.join(root_path,
'{}_infos_val.pkl'.format(info_prefix))
mmcv.dump(data, info_val_path)
def get_available_scenes(nusc):
"""Get available scenes from the input nuscenes class.
Given the raw data, get the information of available scenes for
further info generation.
Args:
nusc (class): Dataset class in the nuScenes dataset.
Returns:
available_scenes (list[dict]): List of basic information for the
available scenes.
"""
available_scenes = []
print('total scene num: {}'.format(len(nusc.scene)))
for scene in nusc.scene:
scene_token = scene['token']
scene_rec = nusc.get('scene', scene_token)
sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
has_more_frames = True
scene_not_exist = False
while has_more_frames:
lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
lidar_path = str(lidar_path)
if os.getcwd() in lidar_path:
# path from lyftdataset is absolute path
lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]
# relative path
if not mmcv.is_filepath(lidar_path):
scene_not_exist = True
break
else:
break
if scene_not_exist:
continue
available_scenes.append(scene)
print('exist scene num: {}'.format(len(available_scenes)))
return available_scenes
def _fill_trainval_infos(nusc,
train_scenes,
val_scenes,
test=False,
max_sweeps=10):
"""Generate the train/val infos from the raw data.
Args:
nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
train_scenes (list[str]): Basic information of training scenes.
val_scenes (list[str]): Basic information of validation scenes.
test (bool, optional): Whether use the test mode. In test mode, no
annotations can be accessed. Default: False.
max_sweeps (int, optional): Max number of sweeps. Default: 10.
Returns:
tuple[list[dict]]: Information of training set and validation set
that will be saved to the info file.
"""
train_nusc_infos = []
val_nusc_infos = []
for sample in mmcv.track_iter_progress(nusc.sample):
lidar_token = sample['data']['LIDAR_TOP']
sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
cs_record = nusc.get('calibrated_sensor',
sd_rec['calibrated_sensor_token'])
pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
mmcv.check_file_exist(lidar_path)
info = {
'lidar_path': lidar_path,
'token': sample['token'],
'sweeps': [],
'cams': dict(),
'lidar2ego_translation': cs_record['translation'],
'lidar2ego_rotation': cs_record['rotation'],
'ego2global_translation': pose_record['translation'],
'ego2global_rotation': pose_record['rotation'],
'timestamp': sample['timestamp'],
}
l2e_r = info['lidar2ego_rotation']
l2e_t = info['lidar2ego_translation']
e2g_r = info['ego2global_rotation']
e2g_t = info['ego2global_translation']
l2e_r_mat = Quaternion(l2e_r).rotation_matrix
e2g_r_mat = Quaternion(e2g_r).rotation_matrix
# obtain 6 image's information per frame
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
for cam in camera_types:
cam_token = sample['data'][cam]
cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)
cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,
e2g_t, e2g_r_mat, cam)
cam_info.update(cam_intrinsic=cam_intrinsic)
info['cams'].update({cam: cam_info})
# obtain sweeps for a single key-frame
sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
sweeps = []
while len(sweeps) < max_sweeps:
if not sd_rec['prev'] == '':
sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
sweeps.append(sweep)
sd_rec = nusc.get('sample_data', sd_rec['prev'])
else:
break
info['sweeps'] = sweeps
# obtain annotation
if not test:
annotations = [
nusc.get('sample_annotation', token)
for token in sample['anns']
]
locs = np.array([b.center for b in boxes]).reshape(-1, 3)
dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
rots = np.array([b.orientation.yaw_pitch_roll[0]
for b in boxes]).reshape(-1, 1)
velocity = np.array(
[nusc.box_velocity(token)[:2] for token in sample['anns']])
valid_flag = np.array(
[(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0
for anno in annotations],
dtype=bool).reshape(-1)
# convert velo from global to lidar
for i in range(len(boxes)):
velo = np.array([*velocity[i], 0.0])
velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
l2e_r_mat).T
velocity[i] = velo[:2]
names = [b.name for b in boxes]
for i in range(len(names)):
if names[i] in NuScenesDataset.NameMapping:
names[i] = NuScenesDataset.NameMapping[names[i]]
names = np.array(names)
# we need to convert box size to
# the format of our lidar coordinate system
# which is x_size, y_size, z_size (corresponding to l, w, h)
gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
assert len(gt_boxes) == len(
annotations), f'{len(gt_boxes)}, {len(annotations)}'
info['gt_boxes'] = gt_boxes
info['gt_names'] = names
info['gt_velocity'] = velocity.reshape(-1, 2)
info['num_lidar_pts'] = np.array(
[a['num_lidar_pts'] for a in annotations])
info['num_radar_pts'] = np.array(
[a['num_radar_pts'] for a in annotations])
info['valid_flag'] = valid_flag
if sample['scene_token'] in train_scenes:
train_nusc_infos.append(info)
else:
val_nusc_infos.append(info)
return train_nusc_infos, val_nusc_infos
def obtain_sensor2top(nusc,
sensor_token,
l2e_t,
l2e_r_mat,
e2g_t,
e2g_r_mat,
sensor_type='lidar'):
"""Obtain the info with RT matric from general sensor to Top LiDAR.
Args:
nusc (class): Dataset class in the nuScenes dataset.
sensor_token (str): Sample data token corresponding to the
specific sensor type.
l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
in shape (3, 3).
e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
e2g_r_mat (np.ndarray): Rotation matrix from ego to global
in shape (3, 3).
sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'.
Returns:
sweep (dict): Sweep information after transformation.
"""
sd_rec = nusc.get('sample_data', sensor_token)
cs_record = nusc.get('calibrated_sensor',
sd_rec['calibrated_sensor_token'])
pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
data_path = str(nusc.get_sample_data_path(sd_rec['token']))
if os.getcwd() in data_path: # path from lyftdataset is absolute path
data_path = data_path.split(f'{os.getcwd()}/')[-1] # relative path
sweep = {
'data_path': data_path,
'type': sensor_type,
'sample_data_token': sd_rec['token'],
'sensor2ego_translation': cs_record['translation'],
'sensor2ego_rotation': cs_record['rotation'],
'ego2global_translation': pose_record['translation'],
'ego2global_rotation': pose_record['rotation'],
'timestamp': sd_rec['timestamp']
}
l2e_r_s = sweep['sensor2ego_rotation']
l2e_t_s = sweep['sensor2ego_translation']
e2g_r_s = sweep['ego2global_rotation']
e2g_t_s = sweep['ego2global_translation']
# obtain the RT from sensor to Top LiDAR
# sweep->ego->global->ego'->lidar
l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
) + l2e_t @ np.linalg.inv(l2e_r_mat).T
sweep['sensor2lidar_rotation'] = R.T # points @ R.T + T
sweep['sensor2lidar_translation'] = T
return sweep
def export_2d_annotation(root_path, info_path, version, mono3d=True):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
version (str): Dataset version.
mono3d (bool, optional): Whether to export mono3d annotation.
Default: True.
"""
# get bbox annotations for camera
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
nusc_infos = mmcv.load(info_path)['infos']
nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
# info_2d_list = []
cat2Ids = [
dict(id=nus_categories.index(cat_name), name=cat_name)
for cat_name in nus_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
for info in mmcv.track_iter_progress(nusc_infos):
for cam in camera_types:
cam_info = info['cams'][cam]
coco_infos = get_2d_boxes(
nusc,
cam_info['sample_data_token'],
visibilities=['', '1', '2', '3', '4'],
mono3d=mono3d)
(height, width, _) = mmcv.imread(cam_info['data_path']).shape
coco_2d_dict['images'].append(
dict(
file_name=cam_info['data_path'].split('data/nuscenes/')
[-1],
id=cam_info['sample_data_token'],
token=info['token'],
cam2ego_rotation=cam_info['sensor2ego_rotation'],
cam2ego_translation=cam_info['sensor2ego_translation'],
ego2global_rotation=info['ego2global_rotation'],
ego2global_translation=info['ego2global_translation'],
cam_intrinsic=cam_info['cam_intrinsic'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
if mono3d:
json_prefix = f'{info_path[:-4]}_mono3d'
else:
json_prefix = f'{info_path[:-4]}'
mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def get_2d_boxes(nusc,
sample_data_token: str,
visibilities: List[str],
mono3d=True):
"""Get the 2D annotation records for a given `sample_data_token`.
Args:
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec = nusc.get('sample_data', sample_data_token)
assert sd_rec[
'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \
' for camera sample_data!'
if not sd_rec['is_key_frame']:
raise ValueError(
'The 2D re-projections are available only for keyframes.')
s_rec = nusc.get('sample', sd_rec['sample_token'])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
camera_intrinsic = np.array(cs_rec['camera_intrinsic'])
# Get all the annotation with the specified visibilties.
ann_recs = [
nusc.get('sample_annotation', token) for token in s_rec['anns']
]
ann_recs = [
ann_rec for ann_rec in ann_recs
if (ann_rec['visibility_token'] in visibilities)
]
repro_recs = []
for ann_rec in ann_recs:
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = ann_rec['token']
ann_rec['sample_data_token'] = sample_data_token
# Get the box in global coordinates.
box = nusc.get_box(ann_rec['token'])
# Move them to the ego-pose frame.
box.translate(-np.array(pose_rec['translation']))
box.rotate(Quaternion(pose_rec['rotation']).inverse)
# Move them to the calibrated sensor frame.
box.translate(-np.array(cs_rec['translation']))
box.rotate(Quaternion(cs_rec['rotation']).inverse)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box.corners()
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token, sd_rec['filename'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
loc = box.center.tolist()
dim = box.wlh
dim[[0, 1, 2]] = dim[[1, 2, 0]] # convert wlh to our lhw
dim = dim.tolist()
rot = box.orientation.yaw_pitch_roll[0]
rot = [-rot] # convert the rot to our cam coordinate
global_velo2d = nusc.box_velocity(box.token)[:2]
global_velo3d = np.array([*global_velo2d, 0.0])
e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix
c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix
cam_velo3d = global_velo3d @ np.linalg.inv(
e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T
velo = cam_velo3d[0::2].tolist()
repro_rec['bbox_cam3d'] = loc + dim + rot
repro_rec['velo_cam3d'] = velo
center3d = np.array(loc).reshape([1, 3])
center2d = points_cam2img(
center3d, camera_intrinsic, with_depth=True)
repro_rec['center2d'] = center2d.squeeze().tolist()
# normalized center2D + depth
# if samples with depth < 0 will be removed
if repro_rec['center2d'][2] <= 0:
continue
ann_token = nusc.get('sample_annotation',
box.token)['attribute_tokens']
if len(ann_token) == 0:
attr_name = 'None'
else:
attr_name = nusc.get('attribute', ann_token[0])['name']
attr_id = nus_attributes.index(attr_name)
repro_rec['attribute_name'] = attr_name
repro_rec['attribute_id'] = attr_id
repro_recs.append(repro_rec)
return repro_recs
def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (list[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
Return:
tuple [float]: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
img_canvas = box(0, 0, imsize[0], imsize[1])
if polygon_from_2d_box.intersects(img_canvas):
img_intersection = polygon_from_2d_box.intersection(img_canvas)
intersection_coords = np.array(
[coord for coord in img_intersection.exterior.coords])
min_x = min(intersection_coords[:, 0])
min_y = min(intersection_coords[:, 1])
max_x = max(intersection_coords[:, 0])
max_y = max(intersection_coords[:, 1])
return min_x, min_y, max_x, max_y
else:
return None
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
sample_data_token: str, filename: str) -> OrderedDict:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, dx, dy of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
relevant_keys = [
'attribute_tokens',
'category_name',
'instance_token',
'next',
'num_lidar_pts',
'num_radar_pts',
'prev',
'sample_annotation_token',
'sample_data_token',
'visibility_token',
]
for key, value in ann_rec.items():
if key in relevant_keys:
repro_rec[key] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in NuScenesDataset.NameMapping:
return None
cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = nus_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment