Commit 4cd43886 authored by lishj6's avatar lishj6 🏸
Browse files

init

parent a9a1fe81
import torch
import torch.nn as nn
import math
class RelPositionEmbedding(nn.Module):
def __init__(self, num_pos_feats=64, pos_norm=True):
super().__init__()
self.num_pos_feats = num_pos_feats
self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
#nn.init.orthogonal_(self.fc.weight)
#self.fc.weight.requires_grad = False
self.pos_norm = pos_norm
if self.pos_norm:
self.norm = nn.LayerNorm(self.num_pos_feats)
def forward(self, tensor):
#mask = nesttensor.mask
B,C,H,W = tensor.shape
#print('tensor.shape', tensor.shape)
y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
#y_axis = torch.stack((y_range, 1-y_range),dim=1)
y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
#x_axis =torch.stack((x_range,1-x_range),dim=1)
x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
x_pos = torch.cat((y_axis, x_axis), dim=1)
x_pos = self.fc(x_pos)
if self.pos_norm:
x_pos = self.norm(x_pos)
#print('xpos,', x_pos.max(),x_pos.min())
return x_pos
\ No newline at end of file
import torch
from torchvision.utils import make_grid
import torchvision
import matplotlib.pyplot as plt
import cv2
def convert_color(img_path):
plt.figure()
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
plt.close()
def save_tensor(tensor, path, pad_value=254.0,):
print('save_tensor', path)
tensor = tensor.to(torch.float).detach().cpu()
if tensor.type() == 'torch.BoolTensor':
tensor = tensor*255
if len(tensor.shape) == 3:
tensor = tensor.unsqueeze(1)
tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
convert_color(path)
albumentations>=0.3.2 --no-binary qudida,albumentations
# These must be installed before building mmdetection
cython
numpy
docutils==0.16.0
myst-parser
-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==4.0.2
sphinx-copybutton
sphinx_markdown_tables
sphinx_rtd_theme==0.5.2
urllib3<2.0.0
mmcv>=2.0.0rc4,<2.2.0
mmengine>=0.7.1,<1.0.0
fairscale
jsonlines
nltk
pycocoevalcap
transformers
cityscapesscripts
emoji
fairscale
imagecorruptions
scikit-learn
mmcv>=2.0.0rc4,<2.2.0
mmengine>=0.7.1,<1.0.0
scipy
torch
torchvision
urllib3<2.0.0
matplotlib
numpy
pycocotools
scipy
shapely
six
terminaltables
tqdm
asynctest
cityscapesscripts
codecov
flake8
imagecorruptions
instaboostfast
interrogate
isort==4.3.21
# Note: used for kwarray.group_items, this may be ported to mmcv in the future.
kwarray
memory_profiler
-e git+https://github.com/open-mmlab/mmtracking@dev-1.x#egg=mmtrack
nltk
onnx==1.7.0
onnxruntime>=1.8.0
parameterized
prettytable
protobuf<=3.20.1
psutil
pytest
transformers
ubelt
xdoctest>=0.10.0
yapf
mmpretrain
motmetrics
numpy<1.24.0
scikit-learn
seaborn
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
import numpy as np
import seaborn as sns
from collections import defaultdict
from matplotlib import pyplot as plt
def cal_train_time(log_dicts, args):
for i, log_dict in enumerate(log_dicts):
print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
all_times = []
for epoch in log_dict.keys():
if args.include_outliers:
all_times.append(log_dict[epoch]['time'])
else:
all_times.append(log_dict[epoch]['time'][1:])
all_times = np.array(all_times)
epoch_ave_time = all_times.mean(-1)
slowest_epoch = epoch_ave_time.argmax()
fastest_epoch = epoch_ave_time.argmin()
std_over_epoch = epoch_ave_time.std()
print(f'slowest epoch {slowest_epoch + 1}, '
f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
print(f'fastest epoch {fastest_epoch + 1}, '
f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
print(f'time std over epochs is {std_over_epoch:.4f}')
print(f'average iter time: {np.mean(all_times):.4f} s/iter')
print()
def plot_curve(log_dicts, args):
if args.backend is not None:
plt.switch_backend(args.backend)
sns.set_style(args.style)
# if legend is None, use {filename}_{key} as legend
legend = args.legend
if legend is None:
legend = []
for json_log in args.json_logs:
for metric in args.keys:
legend.append(f'{json_log}_{metric}')
assert len(legend) == (len(args.json_logs) * len(args.keys))
metrics = args.keys
num_metrics = len(metrics)
for i, log_dict in enumerate(log_dicts):
epochs = list(log_dict.keys())
for j, metric in enumerate(metrics):
print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
if metric not in log_dict[epochs[args.interval - 1]]:
raise KeyError(
f'{args.json_logs[i]} does not contain metric {metric}')
if args.mode == 'eval':
if min(epochs) == args.interval:
x0 = args.interval
else:
# if current training is resumed from previous checkpoint
# we lost information in early epochs
# `xs` should start according to `min(epochs)`
if min(epochs) % args.interval == 0:
x0 = min(epochs)
else:
# find the first epoch that do eval
x0 = min(epochs) + args.interval - \
min(epochs) % args.interval
xs = np.arange(x0, max(epochs) + 1, args.interval)
ys = []
for epoch in epochs[args.interval - 1::args.interval]:
ys += log_dict[epoch][metric]
# if training is aborted before eval of the last epoch
# `xs` and `ys` will have different length and cause an error
# check if `ys[-1]` is empty here
if not log_dict[epoch][metric]:
xs = xs[:-1]
ax = plt.gca()
ax.set_xticks(xs)
plt.xlabel('epoch')
plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
else:
xs = []
ys = []
num_iters_per_epoch = \
log_dict[epochs[args.interval-1]]['iter'][-1]
for epoch in epochs[args.interval - 1::args.interval]:
iters = log_dict[epoch]['iter']
if log_dict[epoch]['mode'][-1] == 'val':
iters = iters[:-1]
xs.append(
np.array(iters) + (epoch - 1) * num_iters_per_epoch)
ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
xs = np.concatenate(xs)
ys = np.concatenate(ys)
plt.xlabel('iter')
plt.plot(
xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
plt.legend()
if args.title is not None:
plt.title(args.title)
if args.out is None:
plt.show()
else:
print(f'save curve to: {args.out}')
plt.savefig(args.out)
plt.cla()
def add_plot_parser(subparsers):
parser_plt = subparsers.add_parser(
'plot_curve', help='parser for plotting curves')
parser_plt.add_argument(
'json_logs',
type=str,
nargs='+',
help='path of train log in json format')
parser_plt.add_argument(
'--keys',
type=str,
nargs='+',
default=['mAP_0.25'],
help='the metric that you want to plot')
parser_plt.add_argument('--title', type=str, help='title of figure')
parser_plt.add_argument(
'--legend',
type=str,
nargs='+',
default=None,
help='legend of each plot')
parser_plt.add_argument(
'--backend', type=str, default=None, help='backend of plt')
parser_plt.add_argument(
'--style', type=str, default='dark', help='style of plt')
parser_plt.add_argument('--out', type=str, default=None)
parser_plt.add_argument('--mode', type=str, default='train')
parser_plt.add_argument('--interval', type=int, default=1)
def add_time_parser(subparsers):
parser_time = subparsers.add_parser(
'cal_train_time',
help='parser for computing the average time per training iteration')
parser_time.add_argument(
'json_logs',
type=str,
nargs='+',
help='path of train log in json format')
parser_time.add_argument(
'--include-outliers',
action='store_true',
help='include the first value of every epoch when computing '
'the average time')
def parse_args():
parser = argparse.ArgumentParser(description='Analyze Json Log')
# currently only support plot curve and calculate average train time
subparsers = parser.add_subparsers(dest='task', help='task parser')
add_plot_parser(subparsers)
add_time_parser(subparsers)
args = parser.parse_args()
return args
def load_json_logs(json_logs):
# load and convert json_logs to log_dict, key is epoch, value is a sub dict
# keys of sub dict is different metrics, e.g. memory, bbox_mAP
# value of sub dict is a list of corresponding values of all iterations
log_dicts = [dict() for _ in json_logs]
for json_log, log_dict in zip(json_logs, log_dicts):
with open(json_log, 'r') as log_file:
for line in log_file:
log = json.loads(line.strip())
# skip lines without `epoch` field
if 'epoch' not in log:
continue
epoch = log.pop('epoch')
if epoch not in log_dict:
log_dict[epoch] = defaultdict(list)
for k, v in log.items():
log_dict[epoch][k].append(v)
return log_dicts
def main():
args = parse_args()
json_logs = args.json_logs
for json_log in json_logs:
assert json_log.endswith('.json')
log_dicts = load_json_logs(json_logs)
eval(args.task)(log_dicts, args)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import time
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
import sys
sys.path.append('.')
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from projects.mmdet3d_plugin.datasets import custom_build_dataset
# from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
#from tools.misc.fuse_conv_bn import fuse_module
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('--checkpoint', default=None, help='checkpoint file')
parser.add_argument('--samples', default=2000, help='samples to benchmark')
parser.add_argument(
'--log-interval', default=50, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
print(cfg.data.test)
dataset = custom_build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=False,
shuffle=False)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
if args.checkpoint is not None:
load_checkpoint(model, args.checkpoint, map_location='cpu')
#if args.fuse_conv_bn:
# model = fuse_module(model)
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0
# benchmark with several samples and take the average
for i, data in enumerate(data_loader):
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, rescale=True, **data)
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{i + 1:<3}/ {args.samples}], '
f'fps: {fps:.1f} img / s')
if (i + 1) == args.samples:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall fps: {fps:.1f} img / s')
break
if __name__ == '__main__':
main()
import torch
file_path = './ckpts/bevformer_v4.pth'
model = torch.load(file_path, map_location='cpu')
all = 0
for key in list(model['state_dict'].keys()):
all += model['state_dict'][key].nelement()
print(all)
# smaller 63374123
# v4 69140395
# Based on https://github.com/nutonomy/nuscenes-devkit
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import mmcv
from nuscenes.nuscenes import NuScenes
from PIL import Image
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from typing import Tuple, List, Iterable
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from matplotlib import rcParams
from matplotlib.axes import Axes
from pyquaternion import Quaternion
from PIL import Image
from matplotlib import rcParams
from matplotlib.axes import Axes
from pyquaternion import Quaternion
from tqdm import tqdm
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from nuscenes.eval.common.data_classes import EvalBoxes, EvalBox
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.eval.detection.render import visualize_sample
cams = ['CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_RIGHT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_FRONT_LEFT']
import numpy as np
import matplotlib.pyplot as plt
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
from PIL import Image
from matplotlib import rcParams
def render_annotation(
anntoken: str,
margin: float = 10,
view: np.ndarray = np.eye(4),
box_vis_level: BoxVisibility = BoxVisibility.ANY,
out_path: str = 'render.png',
extra_info: bool = False) -> None:
"""
Render selected annotation.
:param anntoken: Sample_annotation token.
:param margin: How many meters in each direction to include in LIDAR view.
:param view: LIDAR view point.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param out_path: Optional path to save the rendered figure to disk.
:param extra_info: Whether to render extra information below camera view.
"""
ann_record = nusc.get('sample_annotation', anntoken)
sample_record = nusc.get('sample', ann_record['sample_token'])
assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
# Figure out which camera the object is fully visible in (this may return nothing).
boxes, cam = [], []
cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
all_bboxes = []
select_cams = []
for cam in cams:
_, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
selected_anntokens=[anntoken])
if len(boxes) > 0:
all_bboxes.append(boxes)
select_cams.append(cam)
# We found an image that matches. Let's abort.
# assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
# 'Try using e.g. BoxVisibility.ANY.'
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
num_cam = len(all_bboxes)
fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
select_cams = [sample_record['data'][cam] for cam in select_cams]
print('bbox in cams:', select_cams)
# Plot LIDAR view.
lidar = sample_record['data']['LIDAR_TOP']
data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[0], view=view, colors=(c, c, c))
corners = view_points(boxes[0].corners(), view, False)[:2, :]
axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
axes[0].axis('off')
axes[0].set_aspect('equal')
# Plot CAMERA view.
for i in range(1, num_cam + 1):
cam = select_cams[i - 1]
data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
im = Image.open(data_path)
axes[i].imshow(im)
axes[i].set_title(nusc.get('sample_data', cam)['channel'])
axes[i].axis('off')
axes[i].set_aspect('equal')
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Print extra information about the annotation below the camera view.
axes[i].set_xlim(0, im.size[0])
axes[i].set_ylim(im.size[1], 0)
if extra_info:
rcParams['font.family'] = 'monospace'
w, l, h = ann_record['size']
category = ann_record['category_name']
lidar_points = ann_record['num_lidar_pts']
radar_points = ann_record['num_radar_pts']
sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
information = ' \n'.join(['category: {}'.format(category),
'',
'# lidar points: {0:>4}'.format(lidar_points),
'# radar points: {0:>4}'.format(radar_points),
'',
'distance: {:>7.3f}m'.format(dist),
'',
'width: {:>7.3f}m'.format(w),
'length: {:>7.3f}m'.format(l),
'height: {:>7.3f}m'.format(h)])
plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
if out_path is not None:
plt.savefig(out_path)
def get_sample_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
if selected_anntokens is not None:
boxes = list(map(nusc.get_box, selected_anntokens))
else:
boxes = nusc.get_boxes(sample_data_token)
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def get_predicted_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False,
pred_anns=None
):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
# if selected_anntokens is not None:
# boxes = list(map(nusc.get_box, selected_anntokens))
# else:
# boxes = nusc.get_boxes(sample_data_token)
boxes = pred_anns
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def lidiar_render(sample_token, data,out_path=None):
bbox_gt_list = []
bbox_pred_list = []
anns = nusc.get('sample', sample_token)['anns']
for ann in anns:
content = nusc.get('sample_annotation', ann)
try:
bbox_gt_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=nusc.box_velocity(content['token'])[:2],
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=category_to_detection_name(content['category_name']),
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=''))
except:
pass
bbox_anns = data['results'][sample_token]
for content in bbox_anns:
bbox_pred_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=tuple(content['velocity']),
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=content['detection_name'],
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=content['attribute_name']))
gt_annotations = EvalBoxes()
pred_annotations = EvalBoxes()
gt_annotations.add_boxes(sample_token, bbox_gt_list)
pred_annotations.add_boxes(sample_token, bbox_pred_list)
print('green is ground truth')
print('blue is the predited result')
visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path+'_bev')
def get_color(category_name: str):
"""
Provides the default colors based on the category names.
This method works for the general nuScenes categories, as well as the nuScenes detection categories.
"""
a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack', 'vehicle.bicycle',
'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
'vehicle.ego']
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
#print(category_name)
if category_name == 'bicycle':
return nusc.colormap['vehicle.bicycle']
elif category_name == 'construction_vehicle':
return nusc.colormap['vehicle.construction']
elif category_name == 'traffic_cone':
return nusc.colormap['movable_object.trafficcone']
for key in nusc.colormap.keys():
if category_name in key:
return nusc.colormap[key]
return [0, 0, 0]
def render_sample_data(
sample_toekn: str,
with_anns: bool = True,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
axes_limit: float = 40,
ax=None,
nsweeps: int = 1,
out_path: str = None,
underlay_map: bool = True,
use_flat_vehicle_coordinates: bool = True,
show_lidarseg: bool = False,
show_lidarseg_legend: bool = False,
filter_lidarseg_labels=None,
lidarseg_preds_bin_path: str = None,
verbose: bool = True,
show_panoptic: bool = False,
pred_data=None,
) -> None:
"""
Render sample data onto axis.
:param sample_data_token: Sample_data token.
:param with_anns: Whether to draw box annotations.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param axes_limit: Axes limit for lidar and radar (measured in meters).
:param ax: Axes onto which to render.
:param nsweeps: Number of sweeps for lidar and radar.
:param out_path: Optional path to save the rendered figure to disk.
:param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
setting is more correct and rotates the plot by ~90 degrees.
:param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
:param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
:param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
or the list is empty, all classes will be displayed.
:param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
predictions for the sample.
:param verbose: Whether to display the image after it is rendered.
:param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
If show_lidarseg is True, show_panoptic will be set to False.
"""
lidiar_render(sample_toekn, pred_data, out_path=out_path)
sample = nusc.get('sample', sample_toekn)
# sample = data['results'][sample_token_list[0]][0]
cams = [
'CAM_FRONT_LEFT',
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT',
'CAM_BACK',
'CAM_BACK_RIGHT',
]
if ax is None:
_, ax = plt.subplots(4, 3, figsize=(24, 18))
j = 0
for ind, cam in enumerate(cams):
sample_data_token = sample['data'][cam]
sd_record = nusc.get('sample_data', sample_data_token)
sensor_modality = sd_record['sensor_modality']
if sensor_modality in ['lidar', 'radar']:
assert False
elif sensor_modality == 'camera':
# Load boxes and image.
boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
name=record['detection_name'], token='predicted') for record in
pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
box_vis_level=box_vis_level, pred_anns=boxes)
_, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
if ind == 3:
j += 1
ind = ind % 3
data = Image.open(data_path)
# mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
# Init axes.
# Show image.
ax[j, ind].imshow(data)
ax[j + 2, ind].imshow(data)
# Show boxes.
if with_anns:
for box in boxes_pred:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
for box in boxes_gt:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Limit visible range.
ax[j, ind].set_xlim(0, data.size[0])
ax[j, ind].set_ylim(data.size[1], 0)
ax[j + 2, ind].set_xlim(0, data.size[0])
ax[j + 2, ind].set_ylim(data.size[1], 0)
else:
raise ValueError("Error: Unknown sensor modality!")
ax[j, ind].axis('off')
ax[j, ind].set_title('PRED: {} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j, ind].set_aspect('equal')
ax[j + 2, ind].axis('off')
ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j + 2, ind].set_aspect('equal')
if out_path is not None:
plt.savefig(out_path+'_camera', bbox_inches='tight', pad_inches=0, dpi=200)
if verbose:
plt.show()
plt.close()
if __name__ == '__main__':
nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
# render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
sample_token_list = list(bevformer_results['results'].keys())
for id in range(0, 10):
render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from data_converter.create_gt_database import create_groundtruth_database
from data_converter import nuscenes_converter as nuscenes_converter
from data_converter import lyft_converter as lyft_converter
from data_converter import kitti_converter as kitti
from data_converter import indoor_converter as indoor
import argparse
from os import path as osp
import sys
sys.path.append('.')
def kitti_data_prep(root_path, info_prefix, version, out_dir):
"""Prepare data related to Kitti dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
out_dir (str): Output directory of the groundtruth database info.
"""
kitti.create_kitti_info_file(root_path, info_prefix)
kitti.create_reduced_point_cloud(root_path, info_prefix)
info_train_path = osp.join(root_path, f'{info_prefix}_infos_train.pkl')
info_val_path = osp.join(root_path, f'{info_prefix}_infos_val.pkl')
info_trainval_path = osp.join(root_path,
f'{info_prefix}_infos_trainval.pkl')
info_test_path = osp.join(root_path, f'{info_prefix}_infos_test.pkl')
kitti.export_2d_annotation(root_path, info_train_path)
kitti.export_2d_annotation(root_path, info_val_path)
kitti.export_2d_annotation(root_path, info_trainval_path)
kitti.export_2d_annotation(root_path, info_test_path)
create_groundtruth_database(
'KittiDataset',
root_path,
info_prefix,
f'{out_dir}/{info_prefix}_infos_train.pkl',
relative_path=False,
mask_anno_path='instances_train.json',
with_mask=(version == 'mask'))
def nuscenes_data_prep(root_path,
can_bus_root_path,
info_prefix,
version,
dataset_name,
out_dir,
max_sweeps=10):
"""Prepare data related to nuScenes dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
dataset_name (str): The dataset class name.
out_dir (str): Output directory of the groundtruth database info.
max_sweeps (int): Number of input consecutive frames. Default: 10
"""
nuscenes_converter.create_nuscenes_infos(
root_path, out_dir, can_bus_root_path, info_prefix, version=version, max_sweeps=max_sweeps)
if version == 'v1.0-test':
info_test_path = osp.join(
out_dir, f'{info_prefix}_infos_temporal_test.pkl')
nuscenes_converter.export_2d_annotation(
root_path, info_test_path, version=version)
else:
info_train_path = osp.join(
out_dir, f'{info_prefix}_infos_temporal_train.pkl')
info_val_path = osp.join(
out_dir, f'{info_prefix}_infos_temporal_val.pkl')
nuscenes_converter.export_2d_annotation(
root_path, info_train_path, version=version)
nuscenes_converter.export_2d_annotation(
root_path, info_val_path, version=version)
# create_groundtruth_database(dataset_name, root_path, info_prefix,
# f'{out_dir}/{info_prefix}_infos_train.pkl')
def lyft_data_prep(root_path, info_prefix, version, max_sweeps=10):
"""Prepare data related to Lyft dataset.
Related data consists of '.pkl' files recording basic infos.
Although the ground truth database and 2D annotations are not used in
Lyft, it can also be generated like nuScenes.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
max_sweeps (int, optional): Number of input consecutive frames.
Defaults to 10.
"""
lyft_converter.create_lyft_infos(
root_path, info_prefix, version=version, max_sweeps=max_sweeps)
def scannet_data_prep(root_path, info_prefix, out_dir, workers):
"""Prepare the info file for scannet dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
"""
indoor.create_indoor_info_file(
root_path, info_prefix, out_dir, workers=workers)
def s3dis_data_prep(root_path, info_prefix, out_dir, workers):
"""Prepare the info file for s3dis dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
"""
indoor.create_indoor_info_file(
root_path, info_prefix, out_dir, workers=workers)
def sunrgbd_data_prep(root_path, info_prefix, out_dir, workers):
"""Prepare the info file for sunrgbd dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
"""
indoor.create_indoor_info_file(
root_path, info_prefix, out_dir, workers=workers)
def waymo_data_prep(root_path,
info_prefix,
version,
out_dir,
workers,
max_sweeps=5):
"""Prepare the info file for waymo dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
max_sweeps (int): Number of input consecutive frames. Default: 5 \
Here we store pose information of these frames for later use.
"""
from tools.data_converter import waymo_converter as waymo
splits = ['training', 'validation', 'testing']
for i, split in enumerate(splits):
load_dir = osp.join(root_path, 'waymo_format', split)
if split == 'validation':
save_dir = osp.join(out_dir, 'kitti_format', 'training')
else:
save_dir = osp.join(out_dir, 'kitti_format', split)
converter = waymo.Waymo2KITTI(
load_dir,
save_dir,
prefix=str(i),
workers=workers,
test_mode=(split == 'test'))
converter.convert()
# Generate waymo infos
out_dir = osp.join(out_dir, 'kitti_format')
kitti.create_waymo_info_file(out_dir, info_prefix, max_sweeps=max_sweeps)
create_groundtruth_database(
'WaymoDataset',
out_dir,
info_prefix,
f'{out_dir}/{info_prefix}_infos_train.pkl',
relative_path=False,
with_mask=False)
parser = argparse.ArgumentParser(description='Data converter arg parser')
parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
parser.add_argument(
'--root-path',
type=str,
default='./data/kitti',
help='specify the root path of dataset')
parser.add_argument(
'--canbus',
type=str,
default='./data',
help='specify the root path of nuScenes canbus')
parser.add_argument(
'--version',
type=str,
default='v1.0',
required=False,
help='specify the dataset version, no need for kitti')
parser.add_argument(
'--max-sweeps',
type=int,
default=10,
required=False,
help='specify sweeps of lidar per example')
parser.add_argument(
'--out-dir',
type=str,
default='./data/kitti',
required='False',
help='name of info pkl')
parser.add_argument('--extra-tag', type=str, default='kitti')
parser.add_argument(
'--workers', type=int, default=4, help='number of threads to be used')
args = parser.parse_args()
if __name__ == '__main__':
if args.dataset == 'kitti':
kitti_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
version=args.version,
out_dir=args.out_dir)
elif args.dataset == 'nuscenes' and args.version != 'v1.0-mini':
train_version = f'{args.version}-trainval'
nuscenes_data_prep(
root_path=args.root_path,
can_bus_root_path=args.canbus,
info_prefix=args.extra_tag,
version=train_version,
dataset_name='NuScenesDataset',
out_dir=args.out_dir,
max_sweeps=args.max_sweeps)
test_version = f'{args.version}-test'
nuscenes_data_prep(
root_path=args.root_path,
can_bus_root_path=args.canbus,
info_prefix=args.extra_tag,
version=test_version,
dataset_name='NuScenesDataset',
out_dir=args.out_dir,
max_sweeps=args.max_sweeps)
elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':
train_version = f'{args.version}'
nuscenes_data_prep(
root_path=args.root_path,
can_bus_root_path=args.canbus,
info_prefix=args.extra_tag,
version=train_version,
dataset_name='NuScenesDataset',
out_dir=args.out_dir,
max_sweeps=args.max_sweeps)
elif args.dataset == 'lyft':
train_version = f'{args.version}-train'
lyft_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
version=train_version,
max_sweeps=args.max_sweeps)
test_version = f'{args.version}-test'
lyft_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
version=test_version,
max_sweeps=args.max_sweeps)
elif args.dataset == 'waymo':
waymo_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
version=args.version,
out_dir=args.out_dir,
workers=args.workers,
max_sweeps=args.max_sweeps)
elif args.dataset == 'scannet':
scannet_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
out_dir=args.out_dir,
workers=args.workers)
elif args.dataset == 's3dis':
s3dis_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
out_dir=args.out_dir,
workers=args.workers)
elif args.dataset == 'sunrgbd':
sunrgbd_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
out_dir=args.out_dir,
workers=args.workers)
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
import numpy as np
import pickle
from mmcv import track_iter_progress
from mmcv.ops import roi_align
from os import path as osp
from pycocotools import mask as maskUtils
from pycocotools.coco import COCO
from mmdet3d.core.bbox import box_np_ops as box_np_ops
from mmdet3d.datasets import build_dataset
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
def _poly2mask(mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
rle = maskUtils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = maskUtils.decode(rle)
return mask
def _parse_coco_ann_info(ann_info):
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0:
continue
bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_masks_ann.append(ann['segmentation'])
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)
return ann
def crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):
import torch
from torch.nn.modules.utils import _pair
device = pos_proposals.device
num_pos = pos_proposals.size(0)
fake_inds = (
torch.arange(num_pos,
device=device).to(dtype=pos_proposals.dtype)[:, None])
rois = torch.cat([fake_inds, pos_proposals], dim=1) # Nx5
mask_size = _pair(28)
rois = rois.to(device=device)
gt_masks_th = (
torch.from_numpy(gt_masks).to(device).index_select(
0, pos_assigned_gt_inds).to(dtype=rois.dtype))
# Use RoIAlign could apparently accelerate the training (~0.1s/iter)
targets = (
roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))
return targets
def crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):
num_pos = pos_proposals.shape[0]
masks = []
img_patches = []
for i in range(num_pos):
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = pos_proposals[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]
masked_img = gt_mask[..., None] * org_img
img_patch = masked_img[y1:y1 + h, x1:x1 + w]
img_patches.append(img_patch)
masks.append(mask_patch)
return img_patches, masks
def create_groundtruth_database(dataset_class_name,
data_path,
info_prefix,
info_path=None,
mask_anno_path=None,
used_classes=None,
database_save_path=None,
db_info_save_path=None,
relative_path=True,
add_rgb=False,
lidar_only=False,
bev_only=False,
coors_range=None,
with_mask=False):
"""Given the raw data, generate the ground truth database.
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str): Path of the info file.
Default: None.
mask_anno_path (str): Path of the mask_anno.
Default: None.
used_classes (list[str]): Classes have been used.
Default: None.
database_save_path (str): Path to save database.
Default: None.
db_info_save_path (str): Path to save db_info.
Default: None.
relative_path (bool): Whether to use relative path.
Default: True.
with_mask (bool): Whether to use mask.
Default: False.
"""
print(f'Create GT Database of {dataset_class_name}')
dataset_cfg = dict(
type=dataset_class_name, data_root=data_path, ann_file=info_path)
if dataset_class_name == 'KittiDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=with_mask,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
elif dataset_class_name == 'NuScenesDataset':
dataset_cfg.update(
use_valid_flag=True,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True)
])
elif dataset_class_name == 'WaymoDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
dataset = build_dataset(dataset_cfg)
if database_save_path is None:
database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
if db_info_save_path is None:
db_info_save_path = osp.join(data_path,
f'{info_prefix}_dbinfos_train.pkl')
mmcv.mkdir_or_exist(database_save_path)
all_db_infos = dict()
if with_mask:
coco = COCO(osp.join(data_path, mask_anno_path))
imgIds = coco.getImgIds()
file2id = dict()
for i in imgIds:
info = coco.loadImgs([i])[0]
file2id.update({info['file_name']: i})
group_counter = 0
for j in track_iter_progress(list(range(len(dataset)))):
input_dict = dataset.get_data_info(j)
dataset.pre_pipeline(input_dict)
example = dataset.pipeline(input_dict)
annos = example['ann_info']
image_idx = example['sample_idx']
points = example['points'].tensor.numpy()
gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
names = annos['gt_names']
group_dict = dict()
if 'group_ids' in annos:
group_ids = annos['group_ids']
else:
group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
if 'difficulty' in annos:
difficulty = annos['difficulty']
num_obj = gt_boxes_3d.shape[0]
point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
if with_mask:
# prepare masks
gt_boxes = annos['gt_bboxes']
img_path = osp.split(example['img_info']['filename'])[-1]
if img_path not in file2id.keys():
print(f'skip image {img_path} for empty mask')
continue
img_id = file2id[img_path]
kins_annIds = coco.getAnnIds(imgIds=img_id)
kins_raw_info = coco.loadAnns(kins_annIds)
kins_ann_info = _parse_coco_ann_info(kins_raw_info)
h, w = annos['img_shape'][:2]
gt_masks = [
_poly2mask(mask, h, w) for mask in kins_ann_info['masks']
]
# get mask inds based on iou mapping
bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
mask_inds = bbox_iou.argmax(axis=0)
valid_inds = (bbox_iou.max(axis=0) > 0.5)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches, object_masks = crop_image_patch(
gt_boxes, gt_masks, mask_inds, annos['img'])
for i in range(num_obj):
filename = f'{image_idx}_{names[i]}_{i}.bin'
abs_filepath = osp.join(database_save_path, filename)
rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)
# save point clouds and image patches for each object
gt_points = points[point_indices[:, i]]
gt_points[:, :3] -= gt_boxes_3d[i, :3]
if with_mask:
if object_masks[i].sum() == 0 or not valid_inds[i]:
# Skip object for empty or invalid mask
continue
img_patch_path = abs_filepath + '.png'
mask_patch_path = abs_filepath + '.mask.png'
mmcv.imwrite(object_img_patches[i], img_patch_path)
mmcv.imwrite(object_masks[i], mask_patch_path)
with open(abs_filepath, 'w') as f:
gt_points.tofile(f)
if (used_classes is None) or names[i] in used_classes:
db_info = {
'name': names[i],
'path': rel_filepath,
'image_idx': image_idx,
'gt_idx': i,
'box3d_lidar': gt_boxes_3d[i],
'num_points_in_gt': gt_points.shape[0],
'difficulty': difficulty[i],
}
local_group_id = group_ids[i]
# if local_group_id >= 0:
if local_group_id not in group_dict:
group_dict[local_group_id] = group_counter
group_counter += 1
db_info['group_id'] = group_dict[local_group_id]
if 'score' in annos:
db_info['score'] = annos['score'][i]
if with_mask:
db_info.update({'box2d_camera': gt_boxes[i]})
if names[i] in all_db_infos:
all_db_infos[names[i]].append(db_info)
else:
all_db_infos[names[i]] = [db_info]
for k, v in all_db_infos.items():
print(f'load {len(v)} {k} database infos')
with open(db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment