Commit 7c0c60e3 authored by change3n8's avatar change3n8
Browse files

init

parents
*.pyc
*.npy
*.pth
\ No newline at end of file
MIT License
Copyright (c) 2024 Horizon Robotics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# build
cd projects/mmdet3d_plugin/ops/
python3 -m pip install -e . --no-build-isolation
# run
demo
python3 forward.py
\ No newline at end of file
import torch
import deformable_aggregation_ext.deformable_aggregation_ext as da
bs = 6
cam = 6
num_feat = 89760
c = 256
scale = 4
group = 8
anchor = 1220
pts = 13
H, W = 44, 85 # H*W = 3740, 3740 * 6 * 4 = 89760
spatial_shape = torch.tensor([[[H, W]] * scale] * cam, dtype=torch.int32) # [cam, scale, 2]
scale_start_index = torch.zeros((cam, scale), dtype=torch.int32)
feat_area_per_map = H * W # 3740
for i in range(cam):
for s in range(scale):
idx = i * scale + s
scale_start_index[i, s] = idx * feat_area_per_map
# 验证最后一个偏移 + 面积 <= num_feat
last_offset = scale_start_index[cam-1, scale-1]
assert last_offset + feat_area_per_map == num_feat, "Total feature size mismatch!"
#print("scale_start_index[0]:", scale_start_index[0]) # [0, 3740, 7480, 11220]
#print("Total area:", last_offset + feat_area_per_map)
# 其他张量保持不变
feature_maps = torch.rand(bs, num_feat, c).float().cuda()
sampling_location = torch.rand(bs, anchor, pts, cam, 2).float().cuda()
#print(f"size of feature_maps is {feature_maps.size()}")
weights = torch.rand(bs, anchor, pts, cam, scale, group).float().cuda()
# print(feature_maps.size())
# print(spatial_shape.size())
# print(scale_start_index.size())
# print(sampling_location.size())
# print(weights.size())
# print("#######################")
# 调用算子
out = da.deformable_aggregation_forward(
feature_maps, spatial_shape.cuda(), scale_start_index.cuda(),
sampling_location, weights
)
new_out = da.deformable_aggregation_forward_ref(
feature_maps, spatial_shape.cuda(), scale_start_index.cuda(),
sampling_location, weights
)
# -------- 计算整体误差 --------
def compare_tensors(t1, t2, name="", rtol=1e-4, atol=1e-6):
max_abs_diff = (t1 - t2).abs().max().item()
is_close = torch.allclose(t1, t2, rtol=rtol, atol=atol)
print(f"{name}: max_abs_diff={max_abs_diff}, allclose={is_close}")
return max_abs_diff, is_close
compare_tensors(out, new_out, "OUTPUT")
print("finish")
"""
mAP: 0.4647
mATE: 0.5403
mASE: 0.2623
mAOE: 0.4590
mAVE: 0.2198
mAAE: 0.2059
NDS: 0.5636
Eval time: 176.9s
Per-class results:
Object Class AP ATE ASE AOE AVE AAE
car 0.668 0.357 0.142 0.054 0.184 0.195
truck 0.394 0.528 0.187 0.052 0.163 0.210
bus 0.451 0.681 0.196 0.070 0.383 0.243
trailer 0.185 0.971 0.247 0.634 0.175 0.202
construction_vehicle 0.122 0.879 0.496 1.200 0.136 0.406
pedestrian 0.559 0.517 0.287 0.513 0.282 0.151
motorcycle 0.497 0.462 0.238 0.536 0.293 0.236
bicycle 0.426 0.441 0.257 0.951 0.142 0.004
traffic_cone 0.697 0.275 0.299 nan nan nan
barrier 0.648 0.292 0.275 0.122 nan nan
"""
"""
Per-class results:
AMOTA AMOTP RECALL MOTAR GT MOTA MOTP MT ML FAF TP FP FN IDS FRAG TID LGD
bicycle 0.444 1.169 0.533 0.733 1993 0.389 0.566 53 57 19.3 1059 283 931 3 8 1.60 1.75
bus 0.559 1.175 0.626 0.824 2112 0.515 0.751 42 35 14.8 1321 233 790 1 20 1.13 1.95
car 0.678 0.755 0.733 0.819 58317 0.599 0.470 2053 1073 134.2 42626 7706 15565 126 295 0.76 1.03
motorcy 0.522 1.060 0.609 0.823 1977 0.497 0.564 50 38 15.7 1194 211 773 10 17 1.97 2.17
pedestr 0.548 1.059 0.652 0.791 25423 0.506 0.678 677 467 77.6 16274 3404 8854 295 225 1.33 1.85
trailer 0.136 1.603 0.383 0.403 2425 0.154 0.981 30 79 52.6 926 553 1496 3 13 1.49 2.64
truck 0.454 1.132 0.577 0.691 9650 0.399 0.594 210 214 45.7 5569 1723 4078 3 50 1.35 1.85
Aggregated results:
AMOTA 0.477
AMOTP 1.136
RECALL 0.588
MOTAR 0.726
GT 14556
MOTA 0.437
MOTP 0.658
MT 3115
ML 1963
FAF 51.4
TP 68969
FP 14113
FN 32487
IDS 441
FRAG 628
TID 1.37
LGD 1.89
"""
# ================ base config ===================
plugin = True
plugin_dir = "projects/mmdet3d_plugin/"
dist_params = dict(backend="nccl")
log_level = "INFO"
work_dir = None
total_batch_size = 48
num_gpus = 8
batch_size = total_batch_size // num_gpus
num_iters_per_epoch = int(28130 // (num_gpus * batch_size))
num_epochs = 100
checkpoint_epoch_interval = 20
checkpoint_config = dict(
interval=num_iters_per_epoch * checkpoint_epoch_interval
)
log_config = dict(
interval=51,
hooks=[
dict(type="TextLoggerHook", by_epoch=False),
dict(type="TensorboardLoggerHook"),
],
)
load_from = None
resume_from = None
workflow = [("train", 1)]
fp16 = dict(loss_scale=32.0)
input_shape = (704, 256)
tracking_test = True
tracking_threshold = 0.2
# ================== model ========================
class_names = [
"car",
"truck",
"construction_vehicle",
"bus",
"trailer",
"barrier",
"motorcycle",
"bicycle",
"pedestrian",
"traffic_cone",
]
num_classes = len(class_names)
embed_dims = 256
num_groups = 8
num_decoder = 6
num_single_frame_decoder = 1
use_deformable_func = True # mmdet3d_plugin/ops/setup.py needs to be executed
strides = [4, 8, 16, 32]
num_levels = len(strides)
num_depth_layers = 3
drop_out = 0.1
temporal = True
decouple_attn = True
with_quality_estimation = True
model = dict(
type="Sparse4D",
use_grid_mask=True,
use_deformable_func=use_deformable_func,
img_backbone=dict(
type="ResNet",
depth=50,
num_stages=4,
frozen_stages=-1,
norm_eval=False,
style="pytorch",
with_cp=True,
out_indices=(0, 1, 2, 3),
norm_cfg=dict(type="BN", requires_grad=True),
pretrained="ckpt/resnet50-19c8e357.pth",
),
img_neck=dict(
type="FPN",
num_outs=num_levels,
start_level=0,
out_channels=embed_dims,
add_extra_convs="on_output",
relu_before_extra_convs=True,
in_channels=[256, 512, 1024, 2048],
),
depth_branch=dict( # for auxiliary supervision only
type="DenseDepthNet",
embed_dims=embed_dims,
num_depth_layers=num_depth_layers,
loss_weight=0.2,
),
head=dict(
type="Sparse4DHead",
cls_threshold_to_reg=0.05,
decouple_attn=decouple_attn,
instance_bank=dict(
type="InstanceBank",
num_anchor=900,
embed_dims=embed_dims,
anchor="nuscenes_kmeans900.npy",
anchor_handler=dict(type="SparseBox3DKeyPointsGenerator"),
num_temp_instances=600 if temporal else -1,
confidence_decay=0.6,
feat_grad=False,
),
anchor_encoder=dict(
type="SparseBox3DEncoder",
vel_dims=3,
embed_dims=[128, 32, 32, 64] if decouple_attn else 256,
mode="cat" if decouple_attn else "add",
output_fc=not decouple_attn,
in_loops=1,
out_loops=4 if decouple_attn else 2,
),
num_single_frame_decoder=num_single_frame_decoder,
operation_order=(
[
"gnn",
"norm",
"deformable",
"ffn",
"norm",
"refine",
]
* num_single_frame_decoder
+ [
"temp_gnn",
"gnn",
"norm",
"deformable",
"ffn",
"norm",
"refine",
]
* (num_decoder - num_single_frame_decoder)
)[2:],
temp_graph_model=dict(
type="MultiheadAttention",
embed_dims=embed_dims if not decouple_attn else embed_dims * 2,
num_heads=num_groups,
batch_first=True,
dropout=drop_out,
)
if temporal
else None,
graph_model=dict(
type="MultiheadAttention",
embed_dims=embed_dims if not decouple_attn else embed_dims * 2,
num_heads=num_groups,
batch_first=True,
dropout=drop_out,
),
norm_layer=dict(type="LN", normalized_shape=embed_dims),
ffn=dict(
type="AsymmetricFFN",
in_channels=embed_dims * 2,
pre_norm=dict(type="LN"),
embed_dims=embed_dims,
feedforward_channels=embed_dims * 4,
num_fcs=2,
ffn_drop=drop_out,
act_cfg=dict(type="ReLU", inplace=True),
),
deformable_model=dict(
type="DeformableFeatureAggregation",
embed_dims=embed_dims,
num_groups=num_groups,
num_levels=num_levels,
num_cams=6,
attn_drop=0.15,
use_deformable_func=use_deformable_func,
use_camera_embed=True,
residual_mode="cat",
kps_generator=dict(
type="SparseBox3DKeyPointsGenerator",
num_learnable_pts=6,
fix_scale=[
[0, 0, 0],
[0.45, 0, 0],
[-0.45, 0, 0],
[0, 0.45, 0],
[0, -0.45, 0],
[0, 0, 0.45],
[0, 0, -0.45],
],
),
),
refine_layer=dict(
type="SparseBox3DRefinementModule",
embed_dims=embed_dims,
num_cls=num_classes,
refine_yaw=True,
with_quality_estimation=with_quality_estimation,
),
sampler=dict(
type="SparseBox3DTarget",
num_dn_groups=5,
num_temp_dn_groups=3,
dn_noise_scale=[2.0] * 3 + [0.5] * 7,
max_dn_gt=32,
add_neg_dn=True,
cls_weight=2.0,
box_weight=0.25,
reg_weights=[2.0] * 3 + [0.5] * 3 + [0.0] * 4,
cls_wise_reg_weights={
class_names.index("traffic_cone"): [
2.0,
2.0,
2.0,
1.0,
1.0,
1.0,
0.0,
0.0,
1.0,
1.0,
],
},
),
loss_cls=dict(
type="FocalLoss",
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0,
),
loss_reg=dict(
type="SparseBox3DLoss",
loss_box=dict(type="L1Loss", loss_weight=0.25),
loss_centerness=dict(type="CrossEntropyLoss", use_sigmoid=True),
loss_yawness=dict(type="GaussianFocalLoss"),
cls_allow_reverse=[class_names.index("barrier")],
),
decoder=dict(type="SparseBox3DDecoder"),
reg_weights=[2.0] * 3 + [1.0] * 7,
),
)
# ================== data ========================
dataset_type = "NuScenes3DDetTrackDataset"
data_root = "data/nuscenes/"
anno_root = "data/nuscenes_cam/"
anno_root = "data/nuscenes_anno_pkls/"
file_client_args = dict(backend="disk")
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
)
train_pipeline = [
dict(type="LoadMultiViewImageFromFiles", to_float32=True),
dict(
type="LoadPointsFromFile",
coord_type="LIDAR",
load_dim=5,
use_dim=5,
file_client_args=file_client_args,
),
dict(type="ResizeCropFlipImage"),
dict(
type="MultiScaleDepthMapGenerator",
downsample=strides[:num_depth_layers],
),
dict(type="BBoxRotation"),
dict(type="PhotoMetricDistortionMultiViewImage"),
dict(type="NormalizeMultiviewImage", **img_norm_cfg),
dict(
type="CircleObjectRangeFilter",
class_dist_thred=[55] * len(class_names),
),
dict(type="InstanceNameFilter", classes=class_names),
dict(type="NuScenesSparse4DAdaptor"),
dict(
type="Collect",
keys=[
"img",
"timestamp",
"projection_mat",
"image_wh",
"gt_depth",
"focal",
"gt_bboxes_3d",
"gt_labels_3d",
],
meta_keys=["T_global", "T_global_inv", "timestamp", "instance_id"],
),
]
test_pipeline = [
dict(type="LoadMultiViewImageFromFiles", to_float32=True),
dict(type="ResizeCropFlipImage"),
dict(type="NormalizeMultiviewImage", **img_norm_cfg),
dict(type="NuScenesSparse4DAdaptor"),
dict(
type="Collect",
keys=[
"img",
"timestamp",
"projection_mat",
"image_wh",
],
meta_keys=["T_global", "T_global_inv", "timestamp"],
),
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False,
)
data_basic_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
version="v1.0-trainval",
)
data_aug_conf = {
"resize_lim": (0.40, 0.47),
"final_dim": input_shape[::-1],
"bot_pct_lim": (0.0, 0.0),
"rot_lim": (-5.4, 5.4),
"H": 900,
"W": 1600,
"rand_flip": True,
"rot3d_range": [-0.3925, 0.3925],
}
data = dict(
samples_per_gpu=batch_size,
workers_per_gpu=batch_size,
train=dict(
**data_basic_config,
ann_file=anno_root + "nuscenes_infos_train.pkl",
pipeline=train_pipeline,
test_mode=False,
data_aug_conf=data_aug_conf,
with_seq_flag=True,
sequences_split_num=2,
keep_consistent_seq_aug=True,
),
val=dict(
**data_basic_config,
ann_file=anno_root + "nuscenes_infos_val.pkl",
pipeline=test_pipeline,
data_aug_conf=data_aug_conf,
test_mode=True,
tracking=tracking_test,
tracking_threshold=tracking_threshold,
),
test=dict(
**data_basic_config,
ann_file=anno_root + "nuscenes_infos_val.pkl",
pipeline=test_pipeline,
data_aug_conf=data_aug_conf,
test_mode=True,
tracking=tracking_test,
tracking_threshold=tracking_threshold,
),
)
# ================== training ========================
optimizer = dict(
type="AdamW",
lr=6e-4,
weight_decay=0.001,
paramwise_cfg=dict(
custom_keys={
"img_backbone": dict(lr_mult=0.5),
}
),
)
optimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))
lr_config = dict(
policy="CosineAnnealing",
warmup="linear",
warmup_iters=500,
warmup_ratio=1.0 / 3,
min_lr_ratio=1e-3,
)
runner = dict(
type="IterBasedRunner",
max_iters=num_iters_per_epoch * num_epochs,
)
# ================== eval ========================
vis_pipeline = [
dict(type="LoadMultiViewImageFromFiles", to_float32=True),
dict(
type="Collect",
keys=["img"],
meta_keys=["timestamp", "lidar2img"],
),
]
evaluation = dict(
interval=num_iters_per_epoch * checkpoint_epoch_interval,
pipeline=vis_pipeline,
# out_dir="./vis", # for visualization
)
from .datasets import *
from .models import *
from .apis import *
from .core.evaluation import *
from .train import custom_train_model
from .mmdet_train import custom_train_detector
# from .test import custom_multi_gpu_test
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import random
import warnings
import numpy as np
import torch
import torch.distributed as dist
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (
HOOKS,
DistSamplerSeedHook,
EpochBasedRunner,
Fp16OptimizerHook,
OptimizerHook,
build_optimizer,
build_runner,
get_dist_info,
)
from mmcv.utils import build_from_cfg
from mmdet.core import EvalHook
from mmdet.datasets import build_dataset, replace_ImageToTensor
from mmdet.utils import get_root_logger
import time
import os.path as osp
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from projects.mmdet3d_plugin.core.evaluation.eval_hooks import (
CustomDistEvalHook,
)
from projects.mmdet3d_plugin.datasets import custom_build_dataset
def custom_train_detector(
model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None,
):
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
# assert len(dataset)==1s
if "imgs_per_gpu" in cfg.data:
logger.warning(
'"imgs_per_gpu" is deprecated in MMDet V2.0. '
'Please use "samples_per_gpu" instead'
)
if "samples_per_gpu" in cfg.data:
logger.warning(
f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
f"={cfg.data.imgs_per_gpu} is used in this experiments"
)
else:
logger.warning(
'Automatically set "samples_per_gpu"="imgs_per_gpu"='
f"{cfg.data.imgs_per_gpu} in this experiments"
)
cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
if "runner" in cfg:
runner_type = cfg.runner["type"]
else:
runner_type = "EpochBasedRunner"
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# cfg.gpus will be ignored if distributed
len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed,
nonshuffler_sampler=dict(
type="DistributedSampler"
), # dict(type='DistributedSampler'),
runner_type=runner_type,
)
for ds in dataset
]
# put model on gpus
if distributed:
find_unused_parameters = cfg.get("find_unused_parameters", False)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False,
find_unused_parameters=find_unused_parameters,
)
else:
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids
)
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
if "runner" not in cfg:
cfg.runner = {
"type": "EpochBasedRunner",
"max_epochs": cfg.total_epochs,
}
warnings.warn(
"config is now expected to have a `runner` section, "
"please set `runner` in your config.",
UserWarning,
)
else:
if "total_epochs" in cfg:
assert cfg.total_epochs == cfg.runner.max_epochs
runner = build_runner(
cfg.runner,
default_args=dict(
model=model,
optimizer=optimizer,
work_dir=cfg.work_dir,
logger=logger,
meta=meta,
),
)
# an ugly workaround to make .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get("fp16", None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=distributed
)
elif distributed and "type" not in cfg.optimizer_config:
optimizer_config = OptimizerHook(**cfg.optimizer_config)
else:
optimizer_config = cfg.optimizer_config
# register hooks
runner.register_training_hooks(
cfg.lr_config,
optimizer_config,
cfg.checkpoint_config,
cfg.log_config,
cfg.get("momentum_config", None),
)
# register profiler hook
# trace_config = dict(type='tb_trace', dir_name='work_dir')
# profiler_config = dict(on_trace_ready=trace_config)
# runner.register_profiler_hook(profiler_config)
if distributed:
if isinstance(runner, EpochBasedRunner):
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
# Support batch_size > 1 in validation
val_samples_per_gpu = cfg.data.val.pop("samples_per_gpu", 1)
if val_samples_per_gpu > 1:
assert False
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.val.pipeline = replace_ImageToTensor(
cfg.data.val.pipeline
)
val_dataset = custom_build_dataset(cfg.data.val, dict(test_mode=True))
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=val_samples_per_gpu,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False,
nonshuffler_sampler=dict(type="DistributedSampler"),
)
eval_cfg = cfg.get("evaluation", {})
eval_cfg["by_epoch"] = cfg.runner["type"] != "IterBasedRunner"
eval_cfg["jsonfile_prefix"] = osp.join(
"val",
cfg.work_dir,
time.ctime().replace(" ", "_").replace(":", "_"),
)
eval_hook = CustomDistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
# user-defined hooks
if cfg.get("custom_hooks", None):
custom_hooks = cfg.custom_hooks
assert isinstance(
custom_hooks, list
), f"custom_hooks expect list type, but got {type(custom_hooks)}"
for hook_cfg in cfg.custom_hooks:
assert isinstance(hook_cfg, dict), (
"Each item in custom_hooks expects dict type, but got "
f"{type(hook_cfg)}"
)
hook_cfg = hook_cfg.copy()
priority = hook_cfg.pop("priority", "NORMAL")
hook = build_from_cfg(hook_cfg, HOOKS)
runner.register_hook(hook, priority=priority)
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow)
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import os.path as osp
import pickle
import shutil
import tempfile
import time
import mmcv
import torch
import torch.distributed as dist
from mmcv.image import tensor2imgs
from mmcv.runner import get_dist_info
from mmdet.core import encode_mask_results
import mmcv
import numpy as np
import pycocotools.mask as mask_util
def custom_encode_mask_results(mask_results):
"""Encode bitmap mask to RLE code. Semantic Masks only
Args:
mask_results (list | tuple[list]): bitmap mask results.
In mask scoring rcnn, mask_results is a tuple of (segm_results,
segm_cls_score).
Returns:
list | tuple: RLE encoded mask.
"""
cls_segms = mask_results
num_classes = len(cls_segms)
encoded_mask_results = []
for i in range(len(cls_segms)):
encoded_mask_results.append(
mask_util.encode(
np.array(
cls_segms[i][:, :, np.newaxis], order="F", dtype="uint8"
)
)[0]
) # encoded with RLE
return [encoded_mask_results]
def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
"""Test model with multiple gpus.
This method tests model with multiple gpus and collects the results
under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
it encodes results to gpu tensors and use gpu communication for results
collection. On cpu mode it saves the results on different gpus to 'tmpdir'
and collects them by the rank 0 worker.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode.
gpu_collect (bool): Option to use either gpu or cpu to collect results.
Returns:
list: The prediction results.
"""
model.eval()
bbox_results = []
mask_results = []
dataset = data_loader.dataset
rank, world_size = get_dist_info()
if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset))
time.sleep(2) # This line can prevent deadlock problem in some cases.
have_mask = False
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
# encode mask results
if isinstance(result, dict):
if "bbox_results" in result.keys():
bbox_result = result["bbox_results"]
batch_size = len(result["bbox_results"])
bbox_results.extend(bbox_result)
if (
"mask_results" in result.keys()
and result["mask_results"] is not None
):
mask_result = custom_encode_mask_results(
result["mask_results"]
)
mask_results.extend(mask_result)
have_mask = True
else:
batch_size = len(result)
bbox_results.extend(result)
if rank == 0:
for _ in range(batch_size * world_size):
prog_bar.update()
# collect results from all ranks
if gpu_collect:
bbox_results = collect_results_gpu(bbox_results, len(dataset))
if have_mask:
mask_results = collect_results_gpu(mask_results, len(dataset))
else:
mask_results = None
else:
bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)
tmpdir = tmpdir + "_mask" if tmpdir is not None else None
if have_mask:
mask_results = collect_results_cpu(
mask_results, len(dataset), tmpdir
)
else:
mask_results = None
if mask_results is None:
return bbox_results
return {"bbox_results": bbox_results, "mask_results": mask_results}
def collect_results_cpu(result_part, size, tmpdir=None):
rank, world_size = get_dist_info()
# create a tmp dir if it is not specified
if tmpdir is None:
MAX_LEN = 512
# 32 is whitespace
dir_tensor = torch.full(
(MAX_LEN,), 32, dtype=torch.uint8, device="cuda"
)
if rank == 0:
mmcv.mkdir_or_exist(".dist_test")
tmpdir = tempfile.mkdtemp(dir=".dist_test")
tmpdir = torch.tensor(
bytearray(tmpdir.encode()), dtype=torch.uint8, device="cuda"
)
dir_tensor[: len(tmpdir)] = tmpdir
dist.broadcast(dir_tensor, 0)
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
else:
mmcv.mkdir_or_exist(tmpdir)
# dump the part result to the dir
mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
part_list = []
for i in range(world_size):
part_file = osp.join(tmpdir, f"part_{i}.pkl")
part_list.append(mmcv.load(part_file))
# sort the results
ordered_results = []
"""
bacause we change the sample of the evaluation stage to make sure that
each gpu will handle continuous sample,
"""
# for res in zip(*part_list):
for res in part_list:
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
# remove tmp dir
shutil.rmtree(tmpdir)
return ordered_results
def collect_results_gpu(result_part, size):
collect_results_cpu(result_part, size)
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from .mmdet_train import custom_train_detector
# from mmseg.apis import train_segmentor
from mmdet.apis import train_detector
def custom_train_model(
model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None,
):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
if cfg.model.type in ["EncoderDecoder3D"]:
assert False
else:
custom_train_detector(
model,
dataset,
cfg,
distributed=distributed,
validate=validate,
timestamp=timestamp,
meta=meta,
)
def train_model(
model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None,
):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
train_detector(
model,
dataset,
cfg,
distributed=distributed,
validate=validate,
timestamp=timestamp,
meta=meta,
)
X, Y, Z, W, L, H, SIN_YAW, COS_YAW, VX, VY, VZ = list(range(11)) # undecoded
CNS, YNS = 0, 1 # centerness and yawness indices in qulity
YAW = 6 # decoded
from .eval_hooks import CustomDistEvalHook
\ No newline at end of file
# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
# in order to avoid strong version dependency, we did not directly
# inherit EvalHook but BaseDistEvalHook.
import bisect
import os.path as osp
import mmcv
import torch.distributed as dist
from mmcv.runner import DistEvalHook as BaseDistEvalHook
from mmcv.runner import EvalHook as BaseEvalHook
from torch.nn.modules.batchnorm import _BatchNorm
from mmdet.core.evaluation.eval_hooks import DistEvalHook
def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
assert mmcv.is_list_of(dynamic_interval_list, tuple)
dynamic_milestones = [0]
dynamic_milestones.extend(
[dynamic_interval[0] for dynamic_interval in dynamic_interval_list]
)
dynamic_intervals = [start_interval]
dynamic_intervals.extend(
[dynamic_interval[1] for dynamic_interval in dynamic_interval_list]
)
return dynamic_milestones, dynamic_intervals
class CustomDistEvalHook(BaseDistEvalHook):
def __init__(self, *args, dynamic_intervals=None, **kwargs):
super(CustomDistEvalHook, self).__init__(*args, **kwargs)
self.use_dynamic_intervals = dynamic_intervals is not None
if self.use_dynamic_intervals:
(
self.dynamic_milestones,
self.dynamic_intervals,
) = _calc_dynamic_intervals(self.interval, dynamic_intervals)
def _decide_interval(self, runner):
if self.use_dynamic_intervals:
progress = runner.epoch if self.by_epoch else runner.iter
step = bisect.bisect(self.dynamic_milestones, (progress + 1))
# Dynamically modify the evaluation interval
self.interval = self.dynamic_intervals[step - 1]
def before_train_epoch(self, runner):
"""Evaluate the model only at the start of training by epoch."""
self._decide_interval(runner)
super().before_train_epoch(runner)
def before_train_iter(self, runner):
self._decide_interval(runner)
super().before_train_iter(runner)
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if self.broadcast_bn_buffer:
model = runner.model
for name, module in model.named_modules():
if (
isinstance(module, _BatchNorm)
and module.track_running_stats
):
dist.broadcast(module.running_var, 0)
dist.broadcast(module.running_mean, 0)
if not self._should_evaluate(runner):
return
tmpdir = self.tmpdir
if tmpdir is None:
tmpdir = osp.join(runner.work_dir, ".eval_hook")
from projects.mmdet3d_plugin.apis.test import (
custom_multi_gpu_test,
) # to solve circlur import
results = custom_multi_gpu_test(
runner.model,
self.dataloader,
tmpdir=tmpdir,
gpu_collect=self.gpu_collect,
)
if runner.rank == 0:
print("\n")
runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
key_score = self.evaluate(runner, results)
if self.save_best:
self._save_ckpt(runner, key_score)
from .nuscenes_3d_det_track_dataset import NuScenes3DDetTrackDataset
from .builder import *
from .pipelines import *
from .samplers import *
__all__ = [
'NuScenes3DDetTrackDataset',
"custom_build_dataset",
]
import copy
import platform
import random
from functools import partial
import numpy as np
from mmcv.parallel import collate
from mmcv.runner import get_dist_info
from mmcv.utils import Registry, build_from_cfg
from torch.utils.data import DataLoader
from mmdet.datasets.samplers import GroupSampler
from projects.mmdet3d_plugin.datasets.samplers import (
GroupInBatchSampler,
DistributedGroupSampler,
DistributedSampler,
build_sampler
)
def build_dataloader(
dataset,
samples_per_gpu,
workers_per_gpu,
num_gpus=1,
dist=True,
shuffle=True,
seed=None,
shuffler_sampler=None,
nonshuffler_sampler=None,
runner_type="EpochBasedRunner",
**kwargs
):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank, world_size = get_dist_info()
batch_sampler = None
if runner_type == 'IterBasedRunner':
print("Use GroupInBatchSampler !!!")
batch_sampler = GroupInBatchSampler(
dataset,
samples_per_gpu,
world_size,
rank,
seed=seed,
)
batch_size = 1
sampler = None
num_workers = workers_per_gpu
elif dist:
# DistributedGroupSampler will definitely shuffle the data to satisfy
# that images on each GPU are in the same group
if shuffle:
print("Use DistributedGroupSampler !!!")
sampler = build_sampler(
shuffler_sampler
if shuffler_sampler is not None
else dict(type="DistributedGroupSampler"),
dict(
dataset=dataset,
samples_per_gpu=samples_per_gpu,
num_replicas=world_size,
rank=rank,
seed=seed,
),
)
else:
sampler = build_sampler(
nonshuffler_sampler
if nonshuffler_sampler is not None
else dict(type="DistributedSampler"),
dict(
dataset=dataset,
num_replicas=world_size,
rank=rank,
shuffle=shuffle,
seed=seed,
),
)
batch_size = samples_per_gpu
num_workers = workers_per_gpu
else:
# assert False, 'not support in bevformer'
print("WARNING!!!!, Only can be used for obtain inference speed!!!!")
sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
batch_size = num_gpus * samples_per_gpu
num_workers = num_gpus * workers_per_gpu
init_fn = (
partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
if seed is not None
else None
)
data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
batch_sampler=batch_sampler,
num_workers=num_workers,
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
pin_memory=False,
worker_init_fn=init_fn,
**kwargs
)
return data_loader
def worker_init_fn(worker_id, num_workers, rank, seed):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed = num_workers * rank + worker_id + seed
np.random.seed(worker_seed)
random.seed(worker_seed)
# Copyright (c) OpenMMLab. All rights reserved.
import platform
from mmcv.utils import Registry, build_from_cfg
from mmdet.datasets import DATASETS
from mmdet.datasets.builder import _concat_dataset
if platform.system() != "Windows":
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
base_soft_limit = rlimit[0]
hard_limit = rlimit[1]
soft_limit = min(max(4096, base_soft_limit), hard_limit)
resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
OBJECTSAMPLERS = Registry("Object sampler")
def custom_build_dataset(cfg, default_args=None):
try:
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
except:
CBGSDataset = None
from mmdet.datasets.dataset_wrappers import (
ClassBalancedDataset,
ConcatDataset,
RepeatDataset,
)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset(
[custom_build_dataset(c, default_args) for c in cfg]
)
elif cfg["type"] == "ConcatDataset":
dataset = ConcatDataset(
[custom_build_dataset(c, default_args) for c in cfg["datasets"]],
cfg.get("separate_eval", True),
)
elif cfg["type"] == "RepeatDataset":
dataset = RepeatDataset(
custom_build_dataset(cfg["dataset"], default_args), cfg["times"]
)
elif cfg["type"] == "ClassBalancedDataset":
dataset = ClassBalancedDataset(
custom_build_dataset(cfg["dataset"], default_args),
cfg["oversample_thr"],
)
elif cfg["type"] == "CBGSDataset":
dataset = CBGSDataset(
custom_build_dataset(cfg["dataset"], default_args)
)
elif isinstance(cfg.get("ann_file"), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
import random
import math
import os
from os import path as osp
import cv2
import tempfile
import copy
import numpy as np
import torch
from torch.utils.data import Dataset
import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox
from nuscenes.eval.detection.config import config_factory as det_configs
from nuscenes.eval.common.config import config_factory as track_configs
import mmcv
from mmcv.utils import print_log
from mmdet.datasets import DATASETS
from mmdet.datasets.pipelines import Compose
from .utils import (
draw_lidar_bbox3d_on_img,
draw_lidar_bbox3d_on_bev,
)
@DATASETS.register_module()
class NuScenes3DDetTrackDataset(Dataset):
DefaultAttribute = {
"car": "vehicle.parked",
"pedestrian": "pedestrian.moving",
"trailer": "vehicle.parked",
"truck": "vehicle.parked",
"bus": "vehicle.moving",
"motorcycle": "cycle.without_rider",
"construction_vehicle": "vehicle.parked",
"bicycle": "cycle.without_rider",
"barrier": "",
"traffic_cone": "",
}
ErrNameMapping = {
"trans_err": "mATE",
"scale_err": "mASE",
"orient_err": "mAOE",
"vel_err": "mAVE",
"attr_err": "mAAE",
}
CLASSES = (
"car",
"truck",
"trailer",
"bus",
"construction_vehicle",
"bicycle",
"motorcycle",
"pedestrian",
"traffic_cone",
"barrier",
)
ID_COLOR_MAP = [
(59, 59, 238),
(0, 255, 0),
(0, 0, 255),
(255, 255, 0),
(0, 255, 255),
(255, 0, 255),
(255, 255, 255),
(0, 127, 255),
(71, 130, 255),
(127, 127, 0),
]
def __init__(
self,
ann_file,
pipeline=None,
data_root=None,
classes=None,
load_interval=1,
with_velocity=True,
modality=None,
test_mode=False,
det3d_eval_version="detection_cvpr_2019",
track3d_eval_version="tracking_nips_2019",
version="v1.0-trainval",
use_valid_flag=False,
vis_score_threshold=0.25,
data_aug_conf=None,
sequences_split_num=1,
with_seq_flag=False,
keep_consistent_seq_aug=True,
tracking=False,
tracking_threshold=0.2,
):
self.version = version
self.load_interval = load_interval
self.use_valid_flag = use_valid_flag
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.modality = modality
self.box_mode_3d = 0
if classes is not None:
self.CLASSES = classes
self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
self.data_infos = self.load_annotations(self.ann_file)
if pipeline is not None:
self.pipeline = Compose(pipeline)
self.with_velocity = with_velocity
self.det3d_eval_version = det3d_eval_version
self.det3d_eval_configs = det_configs(self.det3d_eval_version)
self.track3d_eval_version = track3d_eval_version
self.track3d_eval_configs = track_configs(self.track3d_eval_version)
if self.modality is None:
self.modality = dict(
use_camera=False,
use_lidar=True,
use_radar=False,
use_map=False,
use_external=False,
)
self.vis_score_threshold = vis_score_threshold
self.data_aug_conf = data_aug_conf
self.tracking = tracking
self.tracking_threshold = tracking_threshold
self.sequences_split_num = sequences_split_num
self.keep_consistent_seq_aug = keep_consistent_seq_aug
self.current_aug = None
self.last_id = None
if with_seq_flag:
self._set_sequence_group_flag()
def __len__(self):
return len(self.data_infos)
def _set_sequence_group_flag(self):
"""
Set each sequence to be a different group
"""
res = []
curr_sequence = 0
for idx in range(len(self.data_infos)):
if idx != 0 and len(self.data_infos[idx]["sweeps"]) == 0:
# Not first frame and # of sweeps is 0 -> new sequence
curr_sequence += 1
res.append(curr_sequence)
self.flag = np.array(res, dtype=np.int64)
if self.sequences_split_num != 1:
if self.sequences_split_num == "all":
self.flag = np.array(
range(len(self.data_infos)), dtype=np.int64
)
else:
bin_counts = np.bincount(self.flag)
new_flags = []
curr_new_flag = 0
for curr_flag in range(len(bin_counts)):
curr_sequence_length = np.array(
list(
range(
0,
bin_counts[curr_flag],
math.ceil(
bin_counts[curr_flag]
/ self.sequences_split_num
),
)
)
+ [bin_counts[curr_flag]]
)
for sub_seq_idx in (
curr_sequence_length[1:] - curr_sequence_length[:-1]
):
for _ in range(sub_seq_idx):
new_flags.append(curr_new_flag)
curr_new_flag += 1
assert len(new_flags) == len(self.flag)
assert (
len(np.bincount(new_flags))
== len(np.bincount(self.flag)) * self.sequences_split_num
)
self.flag = np.array(new_flags, dtype=np.int64)
def get_augmentation(self):
if self.data_aug_conf is None:
return None
H, W = self.data_aug_conf["H"], self.data_aug_conf["W"]
fH, fW = self.data_aug_conf["final_dim"]
if not self.test_mode:
resize = np.random.uniform(*self.data_aug_conf["resize_lim"])
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = (
int(
(1 - np.random.uniform(*self.data_aug_conf["bot_pct_lim"]))
* newH
)
- fH
)
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
if self.data_aug_conf["rand_flip"] and np.random.choice([0, 1]):
flip = True
rotate = np.random.uniform(*self.data_aug_conf["rot_lim"])
rotate_3d = np.random.uniform(*self.data_aug_conf["rot3d_range"])
else:
resize = max(fH / H, fW / W)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = (
int((1 - np.mean(self.data_aug_conf["bot_pct_lim"])) * newH)
- fH
)
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
rotate = 0
rotate_3d = 0
aug_config = {
"resize": resize,
"resize_dims": resize_dims,
"crop": crop,
"flip": flip,
"rotate": rotate,
"rotate_3d": rotate_3d,
}
return aug_config
def __getitem__(self, idx):
if isinstance(idx, dict):
aug_config = idx["aug_config"]
idx = idx["idx"]
else:
aug_config = self.get_augmentation()
data = self.get_data_info(idx)
data["aug_config"] = aug_config
data = self.pipeline(data)
return data
def get_cat_ids(self, idx):
info = self.data_infos[idx]
if self.use_valid_flag:
mask = info["valid_flag"]
gt_names = set(info["gt_names"][mask])
else:
gt_names = set(info["gt_names"])
cat_ids = []
for name in gt_names:
if name in self.CLASSES:
cat_ids.append(self.cat2id[name])
return cat_ids
def load_annotations(self, ann_file):
data = mmcv.load(ann_file, file_format="pkl")
data_infos = list(sorted(data["infos"], key=lambda e: e["timestamp"]))
data_infos = data_infos[:: self.load_interval]
self.metadata = data["metadata"]
self.version = self.metadata["version"]
print(self.metadata)
return data_infos
def get_data_info(self, index):
info = self.data_infos[index]
# standard protocol modified from SECOND.Pytorch
input_dict = dict(
sample_idx=info["token"],
pts_filename=info["lidar_path"],
sweeps=info["sweeps"],
timestamp=info["timestamp"] / 1e6,
lidar2ego_translation=info["lidar2ego_translation"],
lidar2ego_rotation=info["lidar2ego_rotation"],
ego2global_translation=info["ego2global_translation"],
ego2global_rotation=info["ego2global_rotation"],
)
lidar2ego = np.eye(4)
lidar2ego[:3, :3] = pyquaternion.Quaternion(
info["lidar2ego_rotation"]
).rotation_matrix
lidar2ego[:3, 3] = np.array(info["lidar2ego_translation"])
ego2global = np.eye(4)
ego2global[:3, :3] = pyquaternion.Quaternion(
info["ego2global_rotation"]
).rotation_matrix
ego2global[:3, 3] = np.array(info["ego2global_translation"])
input_dict["lidar2global"] = ego2global @ lidar2ego
if self.modality["use_camera"]:
image_paths = []
lidar2img_rts = []
cam_intrinsic = []
for cam_type, cam_info in info["cams"].items():
image_paths.append(cam_info["data_path"])
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info["sensor2lidar_rotation"])
lidar2cam_t = (
cam_info["sensor2lidar_translation"] @ lidar2cam_r.T
)
lidar2cam_rt = np.eye(4)
lidar2cam_rt[:3, :3] = lidar2cam_r.T
lidar2cam_rt[3, :3] = -lidar2cam_t
intrinsic = copy.deepcopy(cam_info["cam_intrinsic"])
cam_intrinsic.append(intrinsic)
viewpad = np.eye(4)
viewpad[: intrinsic.shape[0], : intrinsic.shape[1]] = intrinsic
lidar2img_rt = viewpad @ lidar2cam_rt.T
lidar2img_rts.append(lidar2img_rt)
input_dict.update(
dict(
img_filename=image_paths,
lidar2img=lidar2img_rts,
cam_intrinsic=cam_intrinsic,
)
)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict.update(annos)
return input_dict
def get_ann_info(self, index):
info = self.data_infos[index]
if self.use_valid_flag:
mask = info["valid_flag"]
else:
mask = info["num_lidar_pts"] > 0
gt_bboxes_3d = info["gt_boxes"][mask]
gt_names_3d = info["gt_names"][mask]
gt_labels_3d = []
for cat in gt_names_3d:
if cat in self.CLASSES:
gt_labels_3d.append(self.CLASSES.index(cat))
else:
gt_labels_3d.append(-1)
gt_labels_3d = np.array(gt_labels_3d)
if self.with_velocity:
gt_velocity = info["gt_velocity"][mask]
nan_mask = np.isnan(gt_velocity[:, 0])
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
gt_names=gt_names_3d,
)
if "instance_inds" in info:
instance_inds = np.array(info["instance_inds"], dtype=np.int)[mask]
anns_results["instance_inds"] = instance_inds
return anns_results
def _format_bbox(self, results, jsonfile_prefix=None, tracking=False):
nusc_annos = {}
mapped_class_names = self.CLASSES
print("Start to convert detection format...")
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
annos = []
boxes = output_to_nusc_box(
det, threshold=self.tracking_threshold if tracking else None
)
sample_token = self.data_infos[sample_id]["token"]
boxes = lidar_nusc_box_to_global(
self.data_infos[sample_id],
boxes,
mapped_class_names,
self.det3d_eval_configs,
self.det3d_eval_version,
)
for i, box in enumerate(boxes):
name = mapped_class_names[box.label]
if tracking and name in [
"barrier",
"traffic_cone",
"construction_vehicle",
]:
continue
if np.sqrt(box.velocity[0] ** 2 + box.velocity[1] ** 2) > 0.2:
if name in [
"car",
"construction_vehicle",
"bus",
"truck",
"trailer",
]:
attr = "vehicle.moving"
elif name in ["bicycle", "motorcycle"]:
attr = "cycle.with_rider"
else:
attr = NuScenes3DDetTrackDataset.DefaultAttribute[name]
else:
if name in ["pedestrian"]:
attr = "pedestrian.standing"
elif name in ["bus"]:
attr = "vehicle.stopped"
else:
attr = NuScenes3DDetTrackDataset.DefaultAttribute[name]
nusc_anno = dict(
sample_token=sample_token,
translation=box.center.tolist(),
size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(),
velocity=box.velocity[:2].tolist(),
)
if not tracking:
nusc_anno.update(
dict(
detection_name=name,
detection_score=box.score,
attribute_name=attr,
)
)
else:
nusc_anno.update(
dict(
tracking_name=name,
tracking_score=box.score,
tracking_id=str(box.token),
)
)
annos.append(nusc_anno)
nusc_annos[sample_token] = annos
nusc_submissions = {
"meta": self.modality,
"results": nusc_annos,
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, "results_nusc.json")
print("Results writes to", res_path)
mmcv.dump(nusc_submissions, res_path)
return res_path
def _evaluate_single(
self, result_path, logger=None, result_name="img_bbox", tracking=False
):
from nuscenes import NuScenes
output_dir = osp.join(*osp.split(result_path)[:-1])
nusc = NuScenes(
version=self.version, dataroot=self.data_root, verbose=False
)
eval_set_map = {
"v1.0-mini": "mini_val",
"v1.0-trainval": "val",
}
if not tracking:
from nuscenes.eval.detection.evaluate import NuScenesEval
nusc_eval = NuScenesEval(
nusc,
config=self.det3d_eval_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=True,
)
nusc_eval.main(render_curves=False)
# record metrics
metrics = mmcv.load(osp.join(output_dir, "metrics_summary.json"))
detail = dict()
metric_prefix = f"{result_name}_NuScenes"
for name in self.CLASSES:
for k, v in metrics["label_aps"][name].items():
val = float("{:.4f}".format(v))
detail[
"{}/{}_AP_dist_{}".format(metric_prefix, name, k)
] = val
for k, v in metrics["label_tp_errors"][name].items():
val = float("{:.4f}".format(v))
detail["{}/{}_{}".format(metric_prefix, name, k)] = val
for k, v in metrics["tp_errors"].items():
val = float("{:.4f}".format(v))
detail[
"{}/{}".format(metric_prefix, self.ErrNameMapping[k])
] = val
detail["{}/NDS".format(metric_prefix)] = metrics["nd_score"]
detail["{}/mAP".format(metric_prefix)] = metrics["mean_ap"]
else:
from nuscenes.eval.tracking.evaluate import TrackingEval
nusc_eval = TrackingEval(
config=self.track3d_eval_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=True,
nusc_version=self.version,
nusc_dataroot=self.data_root,
)
metrics = nusc_eval.main()
# record metrics
metrics = mmcv.load(osp.join(output_dir, "metrics_summary.json"))
print(metrics)
detail = dict()
metric_prefix = f"{result_name}_NuScenes"
keys = [
"amota",
"amotp",
"recall",
"motar",
"gt",
"mota",
"motp",
"mt",
"ml",
"faf",
"tp",
"fp",
"fn",
"ids",
"frag",
"tid",
"lgd",
]
for key in keys:
detail["{}/{}".format(metric_prefix, key)] = metrics[key]
return detail
def format_results(self, results, jsonfile_prefix=None, tracking=False):
assert isinstance(results, list), "results must be a list"
if jsonfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
jsonfile_prefix = osp.join(tmp_dir.name, "results")
else:
tmp_dir = None
if not ("pts_bbox" in results[0] or "img_bbox" in results[0]):
result_files = self._format_bbox(
results, jsonfile_prefix, tracking=tracking
)
else:
result_files = dict()
for name in results[0]:
print(f"\nFormating bboxes of {name}")
results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update(
{
name: self._format_bbox(
results_, tmp_file_, tracking=tracking
)
}
)
return result_files, tmp_dir
def evaluate(
self,
results,
metric=None,
logger=None,
jsonfile_prefix=None,
result_names=["img_bbox"],
show=False,
out_dir=None,
pipeline=None,
):
for metric in ["detection", "tracking"]:
tracking = metric == "tracking"
if tracking and not self.tracking:
continue
result_files, tmp_dir = self.format_results(
results, jsonfile_prefix, tracking=tracking
)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
ret_dict = self._evaluate_single(
result_files[name], tracking=tracking
)
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(
result_files, tracking=tracking
)
if tmp_dir is not None:
tmp_dir.cleanup()
if show or out_dir:
self.show(results, save_dir=out_dir, show=show, pipeline=pipeline)
return results_dict
def show(self, results, save_dir=None, show=False, pipeline=None):
save_dir = "./" if save_dir is None else save_dir
save_dir = os.path.join(save_dir, "visual")
print_log(os.path.abspath(save_dir))
pipeline = Compose(pipeline)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
videoWriter = None
for i, result in enumerate(results):
if "img_bbox" in result.keys():
result = result["img_bbox"]
data_info = pipeline(self.get_data_info(i))
imgs = []
raw_imgs = data_info["img"]
lidar2img = data_info["img_metas"].data["lidar2img"]
pred_bboxes_3d = result["boxes_3d"][
result["scores_3d"] > self.vis_score_threshold
]
if "instance_ids" in result and self.tracking:
color = []
for id in result["instance_ids"].cpu().numpy().tolist():
color.append(
self.ID_COLOR_MAP[int(id % len(self.ID_COLOR_MAP))]
)
elif "labels_3d" in result:
color = []
for id in result["labels_3d"].cpu().numpy().tolist():
color.append(self.ID_COLOR_MAP[id])
else:
color = (255, 0, 0)
# ===== draw boxes_3d to images =====
for j, img_origin in enumerate(raw_imgs):
img = img_origin.copy()
if len(pred_bboxes_3d) != 0:
img = draw_lidar_bbox3d_on_img(
pred_bboxes_3d,
img,
lidar2img[j],
img_metas=None,
color=color,
thickness=3,
)
imgs.append(img)
# ===== draw boxes_3d to BEV =====
bev = draw_lidar_bbox3d_on_bev(
pred_bboxes_3d,
bev_size=img.shape[0] * 2,
color=color,
)
# ===== put text and concat =====
for j, name in enumerate(
[
"front",
"front right",
"front left",
"rear",
"rear left",
"rear right",
]
):
imgs[j] = cv2.rectangle(
imgs[j],
(0, 0),
(440, 80),
color=(255, 255, 255),
thickness=-1,
)
w, h = cv2.getTextSize(name, cv2.FONT_HERSHEY_SIMPLEX, 2, 2)[0]
text_x = int(220 - w / 2)
text_y = int(40 + h / 2)
imgs[j] = cv2.putText(
imgs[j],
name,
(text_x, text_y),
cv2.FONT_HERSHEY_SIMPLEX,
2,
(0, 0, 0),
2,
cv2.LINE_AA,
)
image = np.concatenate(
[
np.concatenate([imgs[2], imgs[0], imgs[1]], axis=1),
np.concatenate([imgs[5], imgs[3], imgs[4]], axis=1),
],
axis=0,
)
image = np.concatenate([image, bev], axis=1)
# ===== save video =====
if videoWriter is None:
videoWriter = cv2.VideoWriter(
os.path.join(save_dir, "video.avi"),
fourcc,
7,
image.shape[:2][::-1],
)
cv2.imwrite(os.path.join(save_dir, f"{i}.jpg"), image)
videoWriter.write(image)
videoWriter.release()
def output_to_nusc_box(detection, threshold=None):
box3d = detection["boxes_3d"]
scores = detection["scores_3d"].numpy()
labels = detection["labels_3d"].numpy()
if "instance_ids" in detection:
ids = detection["instance_ids"] # .numpy()
if threshold is not None:
if "cls_scores" in detection:
mask = detection["cls_scores"].numpy() >= threshold
else:
mask = scores >= threshold
box3d = box3d[mask]
scores = scores[mask]
labels = labels[mask]
ids = ids[mask]
if hasattr(box3d, "gravity_center"):
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
nus_box_dims = box_dims[:, [1, 0, 2]]
box_yaw = box3d.yaw.numpy()
else:
box3d = box3d.numpy()
box_gravity_center = box3d[..., :3].copy()
box_dims = box3d[..., 3:6].copy()
nus_box_dims = box_dims[..., [1, 0, 2]]
box_yaw = box3d[..., 6].copy()
# TODO: check whether this is necessary
# with dir_offset & dir_limit in the head
# box_yaw = -box_yaw - np.pi / 2
box_list = []
for i in range(len(box3d)):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
if hasattr(box3d, "gravity_center"):
velocity = (*box3d.tensor[i, 7:9], 0.0)
else:
velocity = (*box3d[i, 7:9], 0.0)
box = NuScenesBox(
box_gravity_center[i],
nus_box_dims[i],
quat,
label=labels[i],
score=scores[i],
velocity=velocity,
)
if "instance_ids" in detection:
box.token = ids[i]
box_list.append(box)
return box_list
def lidar_nusc_box_to_global(
info,
boxes,
classes,
eval_configs,
eval_version="detection_cvpr_2019",
):
box_list = []
for i, box in enumerate(boxes):
# Move box to ego vehicle coord system
box.rotate(pyquaternion.Quaternion(info["lidar2ego_rotation"]))
box.translate(np.array(info["lidar2ego_translation"]))
# filter det in ego.
cls_range_map = eval_configs.class_range
radius = np.linalg.norm(box.center[:2], 2)
det_range = cls_range_map[classes[box.label]]
if radius > det_range:
continue
# Move box to global coord system
box.rotate(pyquaternion.Quaternion(info["ego2global_rotation"]))
box.translate(np.array(info["ego2global_translation"]))
box_list.append(box)
return box_list
from .transform import (
InstanceNameFilter,
CircleObjectRangeFilter,
NormalizeMultiviewImage,
NuScenesSparse4DAdaptor,
MultiScaleDepthMapGenerator,
)
from .augment import (
ResizeCropFlipImage,
BBoxRotation,
PhotoMetricDistortionMultiViewImage,
)
from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
__all__ = [
"InstanceNameFilter",
"ResizeCropFlipImage",
"BBoxRotation",
"CircleObjectRangeFilter",
"MultiScaleDepthMapGenerator",
"NormalizeMultiviewImage",
"PhotoMetricDistortionMultiViewImage",
"NuScenesSparse4DAdaptor",
"LoadMultiViewImageFromFiles",
"LoadPointsFromFile",
]
import torch
import numpy as np
from numpy import random
import mmcv
from mmdet.datasets.builder import PIPELINES
from PIL import Image
@PIPELINES.register_module()
class ResizeCropFlipImage(object):
def __call__(self, results):
aug_config = results.get("aug_config")
if aug_config is None:
return results
imgs = results["img"]
N = len(imgs)
new_imgs = []
for i in range(N):
img, mat = self._img_transform(
np.uint8(imgs[i]), aug_config,
)
new_imgs.append(np.array(img).astype(np.float32))
results["lidar2img"][i] = mat @ results["lidar2img"][i]
if "cam_intrinsic" in results:
results["cam_intrinsic"][i][:3, :3] *= aug_config["resize"]
# results["cam_intrinsic"][i][:3, :3] = (
# mat[:3, :3] @ results["cam_intrinsic"][i][:3, :3]
# )
results["img"] = new_imgs
results["img_shape"] = [x.shape[:2] for x in new_imgs]
return results
def _img_transform(self, img, aug_configs):
H, W = img.shape[:2]
resize = aug_configs.get("resize", 1)
resize_dims = (int(W * resize), int(H * resize))
crop = aug_configs.get("crop", [0, 0, *resize_dims])
flip = aug_configs.get("flip", False)
rotate = aug_configs.get("rotate", 0)
origin_dtype = img.dtype
if origin_dtype != np.uint8:
min_value = img.min()
max_vaule = img.max()
scale = 255 / (max_vaule - min_value)
img = (img - min_value) * scale
img = np.uint8(img)
img = Image.fromarray(img)
img = img.resize(resize_dims).crop(crop)
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
img = np.array(img).astype(np.float32)
if origin_dtype != np.uint8:
img = img.astype(np.float32)
img = img / scale + min_value
transform_matrix = np.eye(3)
transform_matrix[:2, :2] *= resize
transform_matrix[:2, 2] -= np.array(crop[:2])
if flip:
flip_matrix = np.array(
[[-1, 0, crop[2] - crop[0]], [0, 1, 0], [0, 0, 1]]
)
transform_matrix = flip_matrix @ transform_matrix
rotate = rotate / 180 * np.pi
rot_matrix = np.array(
[
[np.cos(rotate), np.sin(rotate), 0],
[-np.sin(rotate), np.cos(rotate), 0],
[0, 0, 1],
]
)
rot_center = np.array([crop[2] - crop[0], crop[3] - crop[1]]) / 2
rot_matrix[:2, 2] = -rot_matrix[:2, :2] @ rot_center + rot_center
transform_matrix = rot_matrix @ transform_matrix
extend_matrix = np.eye(4)
extend_matrix[:3, :3] = transform_matrix
return img, extend_matrix
@PIPELINES.register_module()
class BBoxRotation(object):
def __call__(self, results):
angle = results["aug_config"]["rotate_3d"]
rot_cos = np.cos(angle)
rot_sin = np.sin(angle)
rot_mat = np.array(
[
[rot_cos, -rot_sin, 0, 0],
[rot_sin, rot_cos, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1],
]
)
rot_mat_inv = np.linalg.inv(rot_mat)
num_view = len(results["lidar2img"])
for view in range(num_view):
results["lidar2img"][view] = (
results["lidar2img"][view] @ rot_mat_inv
)
if "lidar2global" in results:
results["lidar2global"] = results["lidar2global"] @ rot_mat_inv
if "gt_bboxes_3d" in results:
results["gt_bboxes_3d"] = self.box_rotate(
results["gt_bboxes_3d"], angle
)
return results
@staticmethod
def box_rotate(bbox_3d, angle):
rot_cos = np.cos(angle)
rot_sin = np.sin(angle)
rot_mat_T = np.array(
[[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]]
)
bbox_3d[:, :3] = bbox_3d[:, :3] @ rot_mat_T
bbox_3d[:, 6] += angle
if bbox_3d.shape[-1] > 7:
vel_dims = bbox_3d[:, 7:].shape[-1]
bbox_3d[:, 7:] = bbox_3d[:, 7:] @ rot_mat_T[:vel_dims, :vel_dims]
return bbox_3d
@PIPELINES.register_module()
class PhotoMetricDistortionMultiViewImage:
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
8. randomly swap channels
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def __init__(
self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18,
):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, results):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
imgs = results["img"]
new_imgs = []
for img in imgs:
assert img.dtype == np.float32, (
"PhotoMetricDistortion needs the input image of dtype np.float32,"
' please set "to_float32=True" in "LoadImageFromFile" pipeline'
)
# random brightness
if random.randint(2):
delta = random.uniform(
-self.brightness_delta, self.brightness_delta
)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(
self.contrast_lower, self.contrast_upper
)
img *= alpha
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(
self.saturation_lower, self.saturation_upper
)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(
self.contrast_lower, self.contrast_upper
)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
new_imgs.append(img)
results["img"] = new_imgs
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f"(\nbrightness_delta={self.brightness_delta},\n"
repr_str += "contrast_range="
repr_str += f"{(self.contrast_lower, self.contrast_upper)},\n"
repr_str += "saturation_range="
repr_str += f"{(self.saturation_lower, self.saturation_upper)},\n"
repr_str += f"hue_delta={self.hue_delta})"
return repr_str
import numpy as np
import mmcv
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class LoadMultiViewImageFromFiles(object):
"""Load multi channel images from a list of separate channel files.
Expects results['img_filename'] to be a list of filenames.
Args:
to_float32 (bool, optional): Whether to convert the img to float32.
Defaults to False.
color_type (str, optional): Color type of the file.
Defaults to 'unchanged'.
"""
def __init__(self, to_float32=False, color_type="unchanged"):
self.to_float32 = to_float32
self.color_type = color_type
def __call__(self, results):
"""Call function to load multi-view image from files.
Args:
results (dict): Result dict containing multi-view image filenames.
Returns:
dict: The result dict containing the multi-view image data.
Added keys and values are described below.
- filename (str): Multi-view image filenames.
- img (np.ndarray): Multi-view image arrays.
- img_shape (tuple[int]): Shape of multi-view image arrays.
- ori_shape (tuple[int]): Shape of original image arrays.
- pad_shape (tuple[int]): Shape of padded image arrays.
- scale_factor (float): Scale factor.
- img_norm_cfg (dict): Normalization configuration of images.
"""
filename = results["img_filename"]
# img is of shape (h, w, c, num_views)
img = np.stack(
[mmcv.imread(name, self.color_type) for name in filename], axis=-1
)
if self.to_float32:
img = img.astype(np.float32)
results["filename"] = filename
# unravel to list, see `DefaultFormatBundle` in formatting.py
# which will transpose each image separately and then stack into array
results["img"] = [img[..., i] for i in range(img.shape[-1])]
results["img_shape"] = img.shape
results["ori_shape"] = img.shape
# Set initial values for default meta_keys
results["pad_shape"] = img.shape
results["scale_factor"] = 1.0
num_channels = 1 if len(img.shape) < 3 else img.shape[2]
results["img_norm_cfg"] = dict(
mean=np.zeros(num_channels, dtype=np.float32),
std=np.ones(num_channels, dtype=np.float32),
to_rgb=False,
)
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f"(to_float32={self.to_float32}, "
repr_str += f"color_type='{self.color_type}')"
return repr_str
@PIPELINES.register_module()
class LoadPointsFromFile(object):
"""Load Points From File.
Load points from file.
Args:
coord_type (str): The type of coordinates of points cloud.
Available options includes:
- 'LIDAR': Points in LiDAR coordinates.
- 'DEPTH': Points in depth coordinates, usually for indoor dataset.
- 'CAMERA': Points in camera coordinates.
load_dim (int, optional): The dimension of the loaded points.
Defaults to 6.
use_dim (list[int], optional): Which dimensions of the points to use.
Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension.
shift_height (bool, optional): Whether to use shifted height.
Defaults to False.
use_color (bool, optional): Whether to use color features.
Defaults to False.
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
"""
def __init__(
self,
coord_type,
load_dim=6,
use_dim=[0, 1, 2],
shift_height=False,
use_color=False,
file_client_args=dict(backend="disk"),
):
self.shift_height = shift_height
self.use_color = use_color
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert (
max(use_dim) < load_dim
), f"Expect all used dimensions < {load_dim}, got {use_dim}"
assert coord_type in ["CAMERA", "LIDAR", "DEPTH"]
self.coord_type = coord_type
self.load_dim = load_dim
self.use_dim = use_dim
self.file_client_args = file_client_args.copy()
self.file_client = None
def _load_points(self, pts_filename):
"""Private function to load point clouds data.
Args:
pts_filename (str): Filename of point clouds data.
Returns:
np.ndarray: An array containing point clouds data.
"""
if self.file_client is None:
self.file_client = mmcv.FileClient(**self.file_client_args)
try:
pts_bytes = self.file_client.get(pts_filename)
points = np.frombuffer(pts_bytes, dtype=np.float32)
except ConnectionError:
mmcv.check_file_exist(pts_filename)
if pts_filename.endswith(".npy"):
points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
"""Call function to load points data from file.
Args:
results (dict): Result dict containing point clouds data.
Returns:
dict: The result dict containing the point clouds data.
Added key and value are described below.
- points (:obj:`BasePoints`): Point clouds data.
"""
pts_filename = results["pts_filename"]
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
attribute_dims = None
if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate(
[points[:, :3], np.expand_dims(height, 1), points[:, 3:]], 1
)
attribute_dims = dict(height=3)
if self.use_color:
assert len(self.use_dim) >= 6
if attribute_dims is None:
attribute_dims = dict()
attribute_dims.update(
dict(
color=[
points.shape[1] - 3,
points.shape[1] - 2,
points.shape[1] - 1,
]
)
)
results["points"] = points
return results
import numpy as np
import mmcv
from mmcv.parallel import DataContainer as DC
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor
@PIPELINES.register_module()
class MultiScaleDepthMapGenerator(object):
def __init__(self, downsample=1, max_depth=60):
if not isinstance(downsample, (list, tuple)):
downsample = [downsample]
self.downsample = downsample
self.max_depth = max_depth
def __call__(self, input_dict):
points = input_dict["points"][..., :3, None]
gt_depth = []
for i, lidar2img in enumerate(input_dict["lidar2img"]):
H, W = input_dict["img_shape"][i][:2]
pts_2d = (
np.squeeze(lidar2img[:3, :3] @ points, axis=-1)
+ lidar2img[:3, 3]
)
pts_2d[:, :2] /= pts_2d[:, 2:3]
U = np.round(pts_2d[:, 0]).astype(np.int32)
V = np.round(pts_2d[:, 1]).astype(np.int32)
depths = pts_2d[:, 2]
mask = np.logical_and.reduce(
[
V >= 0,
V < H,
U >= 0,
U < W,
depths >= 0.1,
# depths <= self.max_depth,
]
)
V, U, depths = V[mask], U[mask], depths[mask]
sort_idx = np.argsort(depths)[::-1]
V, U, depths = V[sort_idx], U[sort_idx], depths[sort_idx]
depths = np.clip(depths, 0.1, self.max_depth)
for j, downsample in enumerate(self.downsample):
if len(gt_depth) < j + 1:
gt_depth.append([])
h, w = (int(H / downsample), int(W / downsample))
u = np.floor(U / downsample).astype(np.int32)
v = np.floor(V / downsample).astype(np.int32)
depth_map = np.ones([h, w], dtype=np.float32) * -1
depth_map[v, u] = depths
gt_depth[j].append(depth_map)
input_dict["gt_depth"] = [np.stack(x) for x in gt_depth]
return input_dict
@PIPELINES.register_module()
class NuScenesSparse4DAdaptor(object):
def __init(self):
pass
def __call__(self, input_dict):
input_dict["projection_mat"] = np.float32(
np.stack(input_dict["lidar2img"])
)
input_dict["image_wh"] = np.ascontiguousarray(
np.array(input_dict["img_shape"], dtype=np.float32)[:, :2][:, ::-1]
)
input_dict["T_global_inv"] = np.linalg.inv(input_dict["lidar2global"])
input_dict["T_global"] = input_dict["lidar2global"]
if "cam_intrinsic" in input_dict:
input_dict["cam_intrinsic"] = np.float32(
np.stack(input_dict["cam_intrinsic"])
)
input_dict["focal"] = input_dict["cam_intrinsic"][..., 0, 0]
# input_dict["focal"] = np.sqrt(
# np.abs(np.linalg.det(input_dict["cam_intrinsic"][:, :2, :2]))
# )
if "instance_inds" in input_dict:
input_dict["instance_id"] = input_dict["instance_inds"]
if "gt_bboxes_3d" in input_dict:
input_dict["gt_bboxes_3d"][:, 6] = self.limit_period(
input_dict["gt_bboxes_3d"][:, 6], offset=0.5, period=2 * np.pi
)
input_dict["gt_bboxes_3d"] = DC(
to_tensor(input_dict["gt_bboxes_3d"]).float()
)
if "gt_labels_3d" in input_dict:
input_dict["gt_labels_3d"] = DC(
to_tensor(input_dict["gt_labels_3d"]).long()
)
imgs = [img.transpose(2, 0, 1) for img in input_dict["img"]]
imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
input_dict["img"] = DC(to_tensor(imgs), stack=True)
return input_dict
def limit_period(
self, val: np.ndarray, offset: float = 0.5, period: float = np.pi
) -> np.ndarray:
limited_val = val - np.floor(val / period + offset) * period
return limited_val
@PIPELINES.register_module()
class InstanceNameFilter(object):
"""Filter GT objects by their names.
Args:
classes (list[str]): List of class names to be kept for training.
"""
def __init__(self, classes):
self.classes = classes
self.labels = list(range(len(self.classes)))
def __call__(self, input_dict):
"""Call function to filter objects by their names.
Args:
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \
keys are updated in the result dict.
"""
gt_labels_3d = input_dict["gt_labels_3d"]
gt_bboxes_mask = np.array(
[n in self.labels for n in gt_labels_3d], dtype=np.bool_
)
input_dict["gt_bboxes_3d"] = input_dict["gt_bboxes_3d"][gt_bboxes_mask]
input_dict["gt_labels_3d"] = input_dict["gt_labels_3d"][gt_bboxes_mask]
if "instance_inds" in input_dict:
input_dict["instance_inds"] = input_dict["instance_inds"][
gt_bboxes_mask
]
return input_dict
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f"(classes={self.classes})"
return repr_str
@PIPELINES.register_module()
class CircleObjectRangeFilter(object):
def __init__(
self, class_dist_thred=[52.5] * 5 + [31.5] + [42] * 3 + [31.5]
):
self.class_dist_thred = class_dist_thred
def __call__(self, input_dict):
gt_bboxes_3d = input_dict["gt_bboxes_3d"]
gt_labels_3d = input_dict["gt_labels_3d"]
dist = np.sqrt(
np.sum(gt_bboxes_3d[:, :2] ** 2, axis=-1)
)
mask = np.array([False] * len(dist))
for label_idx, dist_thred in enumerate(self.class_dist_thred):
mask = np.logical_or(
mask,
np.logical_and(gt_labels_3d == label_idx, dist <= dist_thred),
)
gt_bboxes_3d = gt_bboxes_3d[mask]
gt_labels_3d = gt_labels_3d[mask]
input_dict["gt_bboxes_3d"] = gt_bboxes_3d
input_dict["gt_labels_3d"] = gt_labels_3d
if "instance_inds" in input_dict:
input_dict["instance_inds"] = input_dict["instance_inds"][mask]
return input_dict
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f"(class_dist_thred={self.class_dist_thred})"
return repr_str
@PIPELINES.register_module()
class NormalizeMultiviewImage(object):
"""Normalize the image.
Added key is "img_norm_cfg".
Args:
mean (sequence): Mean values of 3 channels.
std (sequence): Std values of 3 channels.
to_rgb (bool): Whether to convert the image from BGR to RGB,
default is true.
"""
def __init__(self, mean, std, to_rgb=True):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
self.to_rgb = to_rgb
def __call__(self, results):
"""Call function to normalize images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Normalized results, 'img_norm_cfg' key is added into
result dict.
"""
results["img"] = [
mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
for img in results["img"]
]
results["img_norm_cfg"] = dict(
mean=self.mean, std=self.std, to_rgb=self.to_rgb
)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f"(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})"
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment