Commit a9dc86e9 authored by lishj6's avatar lishj6 🏸
Browse files

init_0905

parent 18eda5c1
*.pyc
*.npy
*.pth
\ No newline at end of file
MIT License
Copyright (c) 2024 Horizon Robotics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# Quick Start
### Set up a new virtual environment
```bash
virtualenv mm_sparse4d --python=python3.8
source mm_sparse4d/bin/activate
```
### Install packpages using pip3
```bash
sparse4d_path="path/to/sparse4d"
cd ${sparse4d_path}
pip3 install --upgrade pip
pip3 install -r requirement.txt
```
### Compile the deformable_aggregation CUDA op
```bash
cd projects/mmdet3d_plugin/ops
python3 setup.py develop
cd ../../../
```
### Prepare the data
Download the [NuScenes dataset](https://www.nuscenes.org/nuscenes#download) and create symbolic links.
```bash
cd ${sparse4d_path}
mkdir data
ln -s path/to/nuscenes ./data/nuscenes
```
Pack the meta-information and labels of the dataset, and generate the required .pkl files.
```bash
pkl_path="data/nuscenes_anno_pkls"
mkdir -p ${pkl_path}
python3 tools/nuscenes_converter.py --version v1.0-mini --info_prefix ${pkl_path}/nuscenes-mini
python3 tools/nuscenes_converter.py --version v1.0-trainval,v1.0-test --info_prefix ${pkl_path}/nuscenes
```
### Generate anchors by K-means
```bash
python3 tools/anchor_generator.py --ann_file ${pkl_path}/nuscenes_infos_train.pkl
```
### Download pre-trained weights
Download the required backbone [pre-trained weights](https://download.pytorch.org/models/resnet50-19c8e357.pth).
```bash
mkdir ckpt
wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O ckpt/resnet50-19c8e357.pth
```
### Commence training and testing
```bash
# train
bash local_train.sh sparse4dv3_temporal_r50_1x8_bs6_256x704
# test
bash local_test.sh sparse4dv3_temporal_r50_1x8_bs6_256x704 path/to/checkpoint
```
For inference-related guidelines, please refer to the [tutorial/tutorial.ipynb](../tutorial/tutorial.ipynb).
export PYTHONPATH=$PYTHONPATH:./
export CUDA_VISIBLE_DEVICES=3
export PORT=29532
gpus=(${CUDA_VISIBLE_DEVICES//,/ })
gpu_num=${#gpus[@]}
config=projects/configs/$1.py
checkpoint=$2
echo "number of gpus: "${gpu_num}
echo "config file: "${config}
echo "checkpoint: "${checkpoint}
if [ ${gpu_num} -gt 1 ]
then
bash ./tools/dist_test.sh \
${config} \
${checkpoint} \
${gpu_num} \
--eval bbox \
$@
else
python ./tools/test.py \
${config} \
${checkpoint} \
--eval bbox \
$@
fi
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export PYTHONPATH=$PYTHONPATH:./
gpus=(${HIP_VISIBLE_DEVICES//,/ })
gpu_num=${#gpus[@]}
echo "number of gpus: "${gpu_num}
config=projects/configs/$1.py
if [ ${gpu_num} -gt 1 ]
then
bash ./tools/dist_train.sh \
${config} \
${gpu_num} \
--work-dir=work_dirs/$1
else
python ./tools/train.py \
${config}
fi
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
# export HIP_VISIBLE_DEVICES=1
export PYTHONPATH=$PYTHONPATH:./
gpus=(${HIP_VISIBLE_DEVICES//,/ })
gpu_num=${#gpus[@]}
echo "number of gpus: "${gpu_num}
config=projects/configs/$1.py
if [ ${gpu_num} -gt 1 ]
then
bash ./tools/dist_train.sh \
${config} \
${gpu_num} \
--work-dir=work_dirs/$1
else
python ./tools/train.py \
${config}
fi
"""
mAP: 0.4647
mATE: 0.5403
mASE: 0.2623
mAOE: 0.4590
mAVE: 0.2198
mAAE: 0.2059
NDS: 0.5636
Eval time: 176.9s
Per-class results:
Object Class AP ATE ASE AOE AVE AAE
car 0.668 0.357 0.142 0.054 0.184 0.195
truck 0.394 0.528 0.187 0.052 0.163 0.210
bus 0.451 0.681 0.196 0.070 0.383 0.243
trailer 0.185 0.971 0.247 0.634 0.175 0.202
construction_vehicle 0.122 0.879 0.496 1.200 0.136 0.406
pedestrian 0.559 0.517 0.287 0.513 0.282 0.151
motorcycle 0.497 0.462 0.238 0.536 0.293 0.236
bicycle 0.426 0.441 0.257 0.951 0.142 0.004
traffic_cone 0.697 0.275 0.299 nan nan nan
barrier 0.648 0.292 0.275 0.122 nan nan
"""
"""
Per-class results:
AMOTA AMOTP RECALL MOTAR GT MOTA MOTP MT ML FAF TP FP FN IDS FRAG TID LGD
bicycle 0.444 1.169 0.533 0.733 1993 0.389 0.566 53 57 19.3 1059 283 931 3 8 1.60 1.75
bus 0.559 1.175 0.626 0.824 2112 0.515 0.751 42 35 14.8 1321 233 790 1 20 1.13 1.95
car 0.678 0.755 0.733 0.819 58317 0.599 0.470 2053 1073 134.2 42626 7706 15565 126 295 0.76 1.03
motorcy 0.522 1.060 0.609 0.823 1977 0.497 0.564 50 38 15.7 1194 211 773 10 17 1.97 2.17
pedestr 0.548 1.059 0.652 0.791 25423 0.506 0.678 677 467 77.6 16274 3404 8854 295 225 1.33 1.85
trailer 0.136 1.603 0.383 0.403 2425 0.154 0.981 30 79 52.6 926 553 1496 3 13 1.49 2.64
truck 0.454 1.132 0.577 0.691 9650 0.399 0.594 210 214 45.7 5569 1723 4078 3 50 1.35 1.85
Aggregated results:
AMOTA 0.477
AMOTP 1.136
RECALL 0.588
MOTAR 0.726
GT 14556
MOTA 0.437
MOTP 0.658
MT 3115
ML 1963
FAF 51.4
TP 68969
FP 14113
FN 32487
IDS 441
FRAG 628
TID 1.37
LGD 1.89
"""
# ================ base config ===================
plugin = True
plugin_dir = "projects/mmdet3d_plugin/"
dist_params = dict(backend="nccl")
log_level = "INFO"
work_dir = None
total_batch_size = 48
num_gpus = 8
batch_size = total_batch_size // num_gpus
num_iters_per_epoch = int(28130 // (num_gpus * batch_size))
num_epochs = 100
checkpoint_epoch_interval = 20
checkpoint_config = dict(
interval=num_iters_per_epoch * checkpoint_epoch_interval
)
log_config = dict(
interval=1,
hooks=[
dict(type="TextLoggerHook", by_epoch=False),
dict(type="TensorboardLoggerHook"),
],
)
load_from = None
resume_from = None
workflow = [("train", 1)]
fp16 = dict(loss_scale=32.0)
input_shape = (704, 256)
tracking_test = True
tracking_threshold = 0.2
# ================== model ========================
class_names = [
"car",
"truck",
"construction_vehicle",
"bus",
"trailer",
"barrier",
"motorcycle",
"bicycle",
"pedestrian",
"traffic_cone",
]
num_classes = len(class_names)
embed_dims = 256
num_groups = 8
num_decoder = 6
num_single_frame_decoder = 1
use_deformable_func = True # mmdet3d_plugin/ops/setup.py needs to be executed
strides = [4, 8, 16, 32]
num_levels = len(strides)
num_depth_layers = 3
drop_out = 0.1
temporal = True
decouple_attn = True
with_quality_estimation = True
model = dict(
type="Sparse4D",
use_grid_mask=True,
use_deformable_func=use_deformable_func,
img_backbone=dict(
type="ResNet",
depth=50,
num_stages=4,
frozen_stages=-1,
norm_eval=False,
style="pytorch",
with_cp=True,
out_indices=(0, 1, 2, 3),
norm_cfg=dict(type="BN", requires_grad=True),
pretrained="ckpt/resnet50-19c8e357.pth",
),
img_neck=dict(
type="FPN",
num_outs=num_levels,
start_level=0,
out_channels=embed_dims,
add_extra_convs="on_output",
relu_before_extra_convs=True,
in_channels=[256, 512, 1024, 2048],
),
depth_branch=dict( # for auxiliary supervision only
type="DenseDepthNet",
embed_dims=embed_dims,
num_depth_layers=num_depth_layers,
loss_weight=0.2,
),
head=dict(
type="Sparse4DHead",
cls_threshold_to_reg=0.05,
decouple_attn=decouple_attn,
instance_bank=dict(
type="InstanceBank",
num_anchor=900,
embed_dims=embed_dims,
anchor="nuscenes_kmeans900.npy",
anchor_handler=dict(type="SparseBox3DKeyPointsGenerator"),
num_temp_instances=600 if temporal else -1,
confidence_decay=0.6,
feat_grad=False,
),
anchor_encoder=dict(
type="SparseBox3DEncoder",
vel_dims=3,
embed_dims=[128, 32, 32, 64] if decouple_attn else 256,
mode="cat" if decouple_attn else "add",
output_fc=not decouple_attn,
in_loops=1,
out_loops=4 if decouple_attn else 2,
),
num_single_frame_decoder=num_single_frame_decoder,
operation_order=(
[
"gnn",
"norm",
"deformable",
"ffn",
"norm",
"refine",
]
* num_single_frame_decoder
+ [
"temp_gnn",
"gnn",
"norm",
"deformable",
"ffn",
"norm",
"refine",
]
* (num_decoder - num_single_frame_decoder)
)[2:],
temp_graph_model=dict(
type="MultiheadAttention",
embed_dims=embed_dims if not decouple_attn else embed_dims * 2,
num_heads=num_groups,
batch_first=True,
dropout=drop_out,
)
if temporal
else None,
graph_model=dict(
type="MultiheadAttention",
embed_dims=embed_dims if not decouple_attn else embed_dims * 2,
num_heads=num_groups,
batch_first=True,
dropout=drop_out,
),
norm_layer=dict(type="LN", normalized_shape=embed_dims),
ffn=dict(
type="AsymmetricFFN",
in_channels=embed_dims * 2,
pre_norm=dict(type="LN"),
embed_dims=embed_dims,
feedforward_channels=embed_dims * 4,
num_fcs=2,
ffn_drop=drop_out,
act_cfg=dict(type="ReLU", inplace=True),
),
deformable_model=dict(
type="DeformableFeatureAggregation",
embed_dims=embed_dims,
num_groups=num_groups,
num_levels=num_levels,
num_cams=6,
attn_drop=0.15,
use_deformable_func=use_deformable_func,
use_camera_embed=True,
residual_mode="cat",
kps_generator=dict(
type="SparseBox3DKeyPointsGenerator",
num_learnable_pts=6,
fix_scale=[
[0, 0, 0],
[0.45, 0, 0],
[-0.45, 0, 0],
[0, 0.45, 0],
[0, -0.45, 0],
[0, 0, 0.45],
[0, 0, -0.45],
],
),
),
refine_layer=dict(
type="SparseBox3DRefinementModule",
embed_dims=embed_dims,
num_cls=num_classes,
refine_yaw=True,
with_quality_estimation=with_quality_estimation,
),
sampler=dict(
type="SparseBox3DTarget",
num_dn_groups=5,
num_temp_dn_groups=3,
dn_noise_scale=[2.0] * 3 + [0.5] * 7,
max_dn_gt=32,
add_neg_dn=True,
cls_weight=2.0,
box_weight=0.25,
reg_weights=[2.0] * 3 + [0.5] * 3 + [0.0] * 4,
cls_wise_reg_weights={
class_names.index("traffic_cone"): [
2.0,
2.0,
2.0,
1.0,
1.0,
1.0,
0.0,
0.0,
1.0,
1.0,
],
},
),
loss_cls=dict(
type="FocalLoss",
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0,
),
loss_reg=dict(
type="SparseBox3DLoss",
loss_box=dict(type="L1Loss", loss_weight=0.25),
loss_centerness=dict(type="CrossEntropyLoss", use_sigmoid=True),
loss_yawness=dict(type="GaussianFocalLoss"),
cls_allow_reverse=[class_names.index("barrier")],
),
decoder=dict(type="SparseBox3DDecoder"),
reg_weights=[2.0] * 3 + [1.0] * 7,
),
)
# ================== data ========================
dataset_type = "NuScenes3DDetTrackDataset"
data_root = "data/nuscenes/"
anno_root = "data/nuscenes_cam/"
anno_root = "data/nuscenes_anno_pkls/"
file_client_args = dict(backend="disk")
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
)
train_pipeline = [
dict(type="LoadMultiViewImageFromFiles", to_float32=True),
dict(
type="LoadPointsFromFile",
coord_type="LIDAR",
load_dim=5,
use_dim=5,
file_client_args=file_client_args,
),
dict(type="ResizeCropFlipImage"),
dict(
type="MultiScaleDepthMapGenerator",
downsample=strides[:num_depth_layers],
),
dict(type="BBoxRotation"),
dict(type="PhotoMetricDistortionMultiViewImage"),
dict(type="NormalizeMultiviewImage", **img_norm_cfg),
dict(
type="CircleObjectRangeFilter",
class_dist_thred=[55] * len(class_names),
),
dict(type="InstanceNameFilter", classes=class_names),
dict(type="NuScenesSparse4DAdaptor"),
dict(
type="Collect",
keys=[
"img",
"timestamp",
"projection_mat",
"image_wh",
"gt_depth",
"focal",
"gt_bboxes_3d",
"gt_labels_3d",
],
meta_keys=["T_global", "T_global_inv", "timestamp", "instance_id"],
),
]
test_pipeline = [
dict(type="LoadMultiViewImageFromFiles", to_float32=True),
dict(type="ResizeCropFlipImage"),
dict(type="NormalizeMultiviewImage", **img_norm_cfg),
dict(type="NuScenesSparse4DAdaptor"),
dict(
type="Collect",
keys=[
"img",
"timestamp",
"projection_mat",
"image_wh",
],
meta_keys=["T_global", "T_global_inv", "timestamp"],
),
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False,
)
data_basic_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
version="v1.0-trainval",
)
data_aug_conf = {
"resize_lim": (0.40, 0.47),
"final_dim": input_shape[::-1],
"bot_pct_lim": (0.0, 0.0),
"rot_lim": (-5.4, 5.4),
"H": 900,
"W": 1600,
"rand_flip": True,
"rot3d_range": [-0.3925, 0.3925],
}
data = dict(
samples_per_gpu=batch_size,
workers_per_gpu=batch_size,
train=dict(
**data_basic_config,
ann_file=anno_root + "nuscenes_infos_train.pkl",
pipeline=train_pipeline,
test_mode=False,
data_aug_conf=data_aug_conf,
with_seq_flag=True,
sequences_split_num=2,
keep_consistent_seq_aug=True,
),
val=dict(
**data_basic_config,
ann_file=anno_root + "nuscenes_infos_val.pkl",
pipeline=test_pipeline,
data_aug_conf=data_aug_conf,
test_mode=True,
tracking=tracking_test,
tracking_threshold=tracking_threshold,
),
test=dict(
**data_basic_config,
ann_file=anno_root + "nuscenes_infos_val.pkl",
pipeline=test_pipeline,
data_aug_conf=data_aug_conf,
test_mode=True,
tracking=tracking_test,
tracking_threshold=tracking_threshold,
),
)
# ================== training ========================
optimizer = dict(
type="AdamW",
lr=6e-4,
weight_decay=0.001,
paramwise_cfg=dict(
custom_keys={
"img_backbone": dict(lr_mult=0.5),
}
),
)
optimizer_config = dict(grad_clip=dict(max_norm=25, norm_type=2))
lr_config = dict(
policy="CosineAnnealing",
warmup="linear",
warmup_iters=500,
warmup_ratio=1.0 / 3,
min_lr_ratio=1e-3,
)
runner = dict(
type="IterBasedRunner",
max_iters=num_iters_per_epoch * num_epochs,
)
# ================== eval ========================
vis_pipeline = [
dict(type="LoadMultiViewImageFromFiles", to_float32=True),
dict(
type="Collect",
keys=["img"],
meta_keys=["timestamp", "lidar2img"],
),
]
evaluation = dict(
interval=num_iters_per_epoch * checkpoint_epoch_interval,
pipeline=vis_pipeline,
# out_dir="./vis", # for visualization
)
from .datasets import *
from .models import *
from .apis import *
from .core.evaluation import *
from .train import custom_train_model
from .mmdet_train import custom_train_detector
# from .test import custom_multi_gpu_test
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import random
import warnings
import numpy as np
import torch
import torch.distributed as dist
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (
HOOKS,
DistSamplerSeedHook,
EpochBasedRunner,
Fp16OptimizerHook,
OptimizerHook,
build_optimizer,
build_runner,
get_dist_info,
)
from mmcv.utils import build_from_cfg
from mmdet.core import EvalHook
from mmdet.datasets import build_dataset, replace_ImageToTensor
from mmdet.utils import get_root_logger
import time
import os.path as osp
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from projects.mmdet3d_plugin.core.evaluation.eval_hooks import (
CustomDistEvalHook,
)
from projects.mmdet3d_plugin.datasets import custom_build_dataset
def custom_train_detector(
model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None,
):
logger = get_root_logger(cfg.log_level)
# prepare data loaders
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
# assert len(dataset)==1s
if "imgs_per_gpu" in cfg.data:
logger.warning(
'"imgs_per_gpu" is deprecated in MMDet V2.0. '
'Please use "samples_per_gpu" instead'
)
if "samples_per_gpu" in cfg.data:
logger.warning(
f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
f"={cfg.data.imgs_per_gpu} is used in this experiments"
)
else:
logger.warning(
'Automatically set "samples_per_gpu"="imgs_per_gpu"='
f"{cfg.data.imgs_per_gpu} in this experiments"
)
cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
if "runner" in cfg:
runner_type = cfg.runner["type"]
else:
runner_type = "EpochBasedRunner"
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# cfg.gpus will be ignored if distributed
len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed,
nonshuffler_sampler=dict(
type="DistributedSampler"
), # dict(type='DistributedSampler'),
runner_type=runner_type,
)
for ds in dataset
]
# put model on gpus
if distributed:
find_unused_parameters = cfg.get("find_unused_parameters", False)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False,
find_unused_parameters=find_unused_parameters,
)
else:
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids
)
# build runner
optimizer = build_optimizer(model, cfg.optimizer)
if "runner" not in cfg:
cfg.runner = {
"type": "EpochBasedRunner",
"max_epochs": cfg.total_epochs,
}
warnings.warn(
"config is now expected to have a `runner` section, "
"please set `runner` in your config.",
UserWarning,
)
else:
if "total_epochs" in cfg:
assert cfg.total_epochs == cfg.runner.max_epochs
runner = build_runner(
cfg.runner,
default_args=dict(
model=model,
optimizer=optimizer,
work_dir=cfg.work_dir,
logger=logger,
meta=meta,
),
)
# an ugly workaround to make .log and .log.json filenames the same
runner.timestamp = timestamp
# fp16 setting
fp16_cfg = cfg.get("fp16", None)
if fp16_cfg is not None:
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=distributed
)
elif distributed and "type" not in cfg.optimizer_config:
optimizer_config = OptimizerHook(**cfg.optimizer_config)
else:
optimizer_config = cfg.optimizer_config
# register hooks
runner.register_training_hooks(
cfg.lr_config,
optimizer_config,
cfg.checkpoint_config,
cfg.log_config,
cfg.get("momentum_config", None),
)
# register profiler hook
# trace_config = dict(type='tb_trace', dir_name='work_dir')
# profiler_config = dict(on_trace_ready=trace_config)
# runner.register_profiler_hook(profiler_config)
if distributed:
if isinstance(runner, EpochBasedRunner):
runner.register_hook(DistSamplerSeedHook())
# register eval hooks
if validate:
# Support batch_size > 1 in validation
val_samples_per_gpu = cfg.data.val.pop("samples_per_gpu", 1)
if val_samples_per_gpu > 1:
assert False
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.val.pipeline = replace_ImageToTensor(
cfg.data.val.pipeline
)
val_dataset = custom_build_dataset(cfg.data.val, dict(test_mode=True))
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=val_samples_per_gpu,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False,
nonshuffler_sampler=dict(type="DistributedSampler"),
)
eval_cfg = cfg.get("evaluation", {})
eval_cfg["by_epoch"] = cfg.runner["type"] != "IterBasedRunner"
eval_cfg["jsonfile_prefix"] = osp.join(
"val",
cfg.work_dir,
time.ctime().replace(" ", "_").replace(":", "_"),
)
eval_hook = CustomDistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
# user-defined hooks
if cfg.get("custom_hooks", None):
custom_hooks = cfg.custom_hooks
assert isinstance(
custom_hooks, list
), f"custom_hooks expect list type, but got {type(custom_hooks)}"
for hook_cfg in cfg.custom_hooks:
assert isinstance(hook_cfg, dict), (
"Each item in custom_hooks expects dict type, but got "
f"{type(hook_cfg)}"
)
hook_cfg = hook_cfg.copy()
priority = hook_cfg.pop("priority", "NORMAL")
hook = build_from_cfg(hook_cfg, HOOKS)
runner.register_hook(hook, priority=priority)
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
runner.run(data_loaders, cfg.workflow)
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import os.path as osp
import pickle
import shutil
import tempfile
import time
import mmcv
import torch
import torch.distributed as dist
from mmcv.image import tensor2imgs
from mmcv.runner import get_dist_info
from mmdet.core import encode_mask_results
import mmcv
import numpy as np
import pycocotools.mask as mask_util
def custom_encode_mask_results(mask_results):
"""Encode bitmap mask to RLE code. Semantic Masks only
Args:
mask_results (list | tuple[list]): bitmap mask results.
In mask scoring rcnn, mask_results is a tuple of (segm_results,
segm_cls_score).
Returns:
list | tuple: RLE encoded mask.
"""
cls_segms = mask_results
num_classes = len(cls_segms)
encoded_mask_results = []
for i in range(len(cls_segms)):
encoded_mask_results.append(
mask_util.encode(
np.array(
cls_segms[i][:, :, np.newaxis], order="F", dtype="uint8"
)
)[0]
) # encoded with RLE
return [encoded_mask_results]
def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
"""Test model with multiple gpus.
This method tests model with multiple gpus and collects the results
under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
it encodes results to gpu tensors and use gpu communication for results
collection. On cpu mode it saves the results on different gpus to 'tmpdir'
and collects them by the rank 0 worker.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode.
gpu_collect (bool): Option to use either gpu or cpu to collect results.
Returns:
list: The prediction results.
"""
model.eval()
bbox_results = []
mask_results = []
dataset = data_loader.dataset
rank, world_size = get_dist_info()
if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset))
time.sleep(2) # This line can prevent deadlock problem in some cases.
have_mask = False
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
# encode mask results
if isinstance(result, dict):
if "bbox_results" in result.keys():
bbox_result = result["bbox_results"]
batch_size = len(result["bbox_results"])
bbox_results.extend(bbox_result)
if (
"mask_results" in result.keys()
and result["mask_results"] is not None
):
mask_result = custom_encode_mask_results(
result["mask_results"]
)
mask_results.extend(mask_result)
have_mask = True
else:
batch_size = len(result)
bbox_results.extend(result)
if rank == 0:
for _ in range(batch_size * world_size):
prog_bar.update()
# collect results from all ranks
if gpu_collect:
bbox_results = collect_results_gpu(bbox_results, len(dataset))
if have_mask:
mask_results = collect_results_gpu(mask_results, len(dataset))
else:
mask_results = None
else:
bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)
tmpdir = tmpdir + "_mask" if tmpdir is not None else None
if have_mask:
mask_results = collect_results_cpu(
mask_results, len(dataset), tmpdir
)
else:
mask_results = None
if mask_results is None:
return bbox_results
return {"bbox_results": bbox_results, "mask_results": mask_results}
def collect_results_cpu(result_part, size, tmpdir=None):
rank, world_size = get_dist_info()
# create a tmp dir if it is not specified
if tmpdir is None:
MAX_LEN = 512
# 32 is whitespace
dir_tensor = torch.full(
(MAX_LEN,), 32, dtype=torch.uint8, device="cuda"
)
if rank == 0:
mmcv.mkdir_or_exist(".dist_test")
tmpdir = tempfile.mkdtemp(dir=".dist_test")
tmpdir = torch.tensor(
bytearray(tmpdir.encode()), dtype=torch.uint8, device="cuda"
)
dir_tensor[: len(tmpdir)] = tmpdir
dist.broadcast(dir_tensor, 0)
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
else:
mmcv.mkdir_or_exist(tmpdir)
# dump the part result to the dir
mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
part_list = []
for i in range(world_size):
part_file = osp.join(tmpdir, f"part_{i}.pkl")
part_list.append(mmcv.load(part_file))
# sort the results
ordered_results = []
"""
bacause we change the sample of the evaluation stage to make sure that
each gpu will handle continuous sample,
"""
# for res in zip(*part_list):
for res in part_list:
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
# remove tmp dir
shutil.rmtree(tmpdir)
return ordered_results
def collect_results_gpu(result_part, size):
collect_results_cpu(result_part, size)
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from .mmdet_train import custom_train_detector
# from mmseg.apis import train_segmentor
from mmdet.apis import train_detector
def custom_train_model(
model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None,
):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
if cfg.model.type in ["EncoderDecoder3D"]:
assert False
else:
custom_train_detector(
model,
dataset,
cfg,
distributed=distributed,
validate=validate,
timestamp=timestamp,
meta=meta,
)
def train_model(
model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None,
):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
train_detector(
model,
dataset,
cfg,
distributed=distributed,
validate=validate,
timestamp=timestamp,
meta=meta,
)
X, Y, Z, W, L, H, SIN_YAW, COS_YAW, VX, VY, VZ = list(range(11)) # undecoded
CNS, YNS = 0, 1 # centerness and yawness indices in qulity
YAW = 6 # decoded
from .eval_hooks import CustomDistEvalHook
\ No newline at end of file
# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
# in order to avoid strong version dependency, we did not directly
# inherit EvalHook but BaseDistEvalHook.
import bisect
import os.path as osp
import mmcv
import torch.distributed as dist
from mmcv.runner import DistEvalHook as BaseDistEvalHook
from mmcv.runner import EvalHook as BaseEvalHook
from torch.nn.modules.batchnorm import _BatchNorm
from mmdet.core.evaluation.eval_hooks import DistEvalHook
def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
assert mmcv.is_list_of(dynamic_interval_list, tuple)
dynamic_milestones = [0]
dynamic_milestones.extend(
[dynamic_interval[0] for dynamic_interval in dynamic_interval_list]
)
dynamic_intervals = [start_interval]
dynamic_intervals.extend(
[dynamic_interval[1] for dynamic_interval in dynamic_interval_list]
)
return dynamic_milestones, dynamic_intervals
class CustomDistEvalHook(BaseDistEvalHook):
def __init__(self, *args, dynamic_intervals=None, **kwargs):
super(CustomDistEvalHook, self).__init__(*args, **kwargs)
self.use_dynamic_intervals = dynamic_intervals is not None
if self.use_dynamic_intervals:
(
self.dynamic_milestones,
self.dynamic_intervals,
) = _calc_dynamic_intervals(self.interval, dynamic_intervals)
def _decide_interval(self, runner):
if self.use_dynamic_intervals:
progress = runner.epoch if self.by_epoch else runner.iter
step = bisect.bisect(self.dynamic_milestones, (progress + 1))
# Dynamically modify the evaluation interval
self.interval = self.dynamic_intervals[step - 1]
def before_train_epoch(self, runner):
"""Evaluate the model only at the start of training by epoch."""
self._decide_interval(runner)
super().before_train_epoch(runner)
def before_train_iter(self, runner):
self._decide_interval(runner)
super().before_train_iter(runner)
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if self.broadcast_bn_buffer:
model = runner.model
for name, module in model.named_modules():
if (
isinstance(module, _BatchNorm)
and module.track_running_stats
):
dist.broadcast(module.running_var, 0)
dist.broadcast(module.running_mean, 0)
if not self._should_evaluate(runner):
return
tmpdir = self.tmpdir
if tmpdir is None:
tmpdir = osp.join(runner.work_dir, ".eval_hook")
from projects.mmdet3d_plugin.apis.test import (
custom_multi_gpu_test,
) # to solve circlur import
results = custom_multi_gpu_test(
runner.model,
self.dataloader,
tmpdir=tmpdir,
gpu_collect=self.gpu_collect,
)
if runner.rank == 0:
print("\n")
runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
key_score = self.evaluate(runner, results)
if self.save_best:
self._save_ckpt(runner, key_score)
from .nuscenes_3d_det_track_dataset import NuScenes3DDetTrackDataset
from .builder import *
from .pipelines import *
from .samplers import *
__all__ = [
'NuScenes3DDetTrackDataset',
"custom_build_dataset",
]
import copy
import platform
import random
from functools import partial
import numpy as np
from mmcv.parallel import collate
from mmcv.runner import get_dist_info
from mmcv.utils import Registry, build_from_cfg
from torch.utils.data import DataLoader
from mmdet.datasets.samplers import GroupSampler
from projects.mmdet3d_plugin.datasets.samplers import (
GroupInBatchSampler,
DistributedGroupSampler,
DistributedSampler,
build_sampler
)
def build_dataloader(
dataset,
samples_per_gpu,
workers_per_gpu,
num_gpus=1,
dist=True,
shuffle=True,
seed=None,
shuffler_sampler=None,
nonshuffler_sampler=None,
runner_type="EpochBasedRunner",
**kwargs
):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank, world_size = get_dist_info()
batch_sampler = None
if runner_type == 'IterBasedRunner':
print("Use GroupInBatchSampler !!!")
batch_sampler = GroupInBatchSampler(
dataset,
samples_per_gpu,
world_size,
rank,
seed=seed,
)
batch_size = 1
sampler = None
num_workers = workers_per_gpu
elif dist:
# DistributedGroupSampler will definitely shuffle the data to satisfy
# that images on each GPU are in the same group
if shuffle:
print("Use DistributedGroupSampler !!!")
sampler = build_sampler(
shuffler_sampler
if shuffler_sampler is not None
else dict(type="DistributedGroupSampler"),
dict(
dataset=dataset,
samples_per_gpu=samples_per_gpu,
num_replicas=world_size,
rank=rank,
seed=seed,
),
)
else:
sampler = build_sampler(
nonshuffler_sampler
if nonshuffler_sampler is not None
else dict(type="DistributedSampler"),
dict(
dataset=dataset,
num_replicas=world_size,
rank=rank,
shuffle=shuffle,
seed=seed,
),
)
batch_size = samples_per_gpu
num_workers = workers_per_gpu
else:
# assert False, 'not support in bevformer'
print("WARNING!!!!, Only can be used for obtain inference speed!!!!")
sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
batch_size = num_gpus * samples_per_gpu
num_workers = num_gpus * workers_per_gpu
init_fn = (
partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
if seed is not None
else None
)
data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
batch_sampler=batch_sampler,
num_workers=num_workers,
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
pin_memory=False,
worker_init_fn=init_fn,
**kwargs
)
return data_loader
def worker_init_fn(worker_id, num_workers, rank, seed):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed = num_workers * rank + worker_id + seed
np.random.seed(worker_seed)
random.seed(worker_seed)
# Copyright (c) OpenMMLab. All rights reserved.
import platform
from mmcv.utils import Registry, build_from_cfg
from mmdet.datasets import DATASETS
from mmdet.datasets.builder import _concat_dataset
if platform.system() != "Windows":
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
base_soft_limit = rlimit[0]
hard_limit = rlimit[1]
soft_limit = min(max(4096, base_soft_limit), hard_limit)
resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
OBJECTSAMPLERS = Registry("Object sampler")
def custom_build_dataset(cfg, default_args=None):
try:
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
except:
CBGSDataset = None
from mmdet.datasets.dataset_wrappers import (
ClassBalancedDataset,
ConcatDataset,
RepeatDataset,
)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset(
[custom_build_dataset(c, default_args) for c in cfg]
)
elif cfg["type"] == "ConcatDataset":
dataset = ConcatDataset(
[custom_build_dataset(c, default_args) for c in cfg["datasets"]],
cfg.get("separate_eval", True),
)
elif cfg["type"] == "RepeatDataset":
dataset = RepeatDataset(
custom_build_dataset(cfg["dataset"], default_args), cfg["times"]
)
elif cfg["type"] == "ClassBalancedDataset":
dataset = ClassBalancedDataset(
custom_build_dataset(cfg["dataset"], default_args),
cfg["oversample_thr"],
)
elif cfg["type"] == "CBGSDataset":
dataset = CBGSDataset(
custom_build_dataset(cfg["dataset"], default_args)
)
elif isinstance(cfg.get("ann_file"), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
from .transform import (
InstanceNameFilter,
CircleObjectRangeFilter,
NormalizeMultiviewImage,
NuScenesSparse4DAdaptor,
MultiScaleDepthMapGenerator,
)
from .augment import (
ResizeCropFlipImage,
BBoxRotation,
PhotoMetricDistortionMultiViewImage,
)
from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
__all__ = [
"InstanceNameFilter",
"ResizeCropFlipImage",
"BBoxRotation",
"CircleObjectRangeFilter",
"MultiScaleDepthMapGenerator",
"NormalizeMultiviewImage",
"PhotoMetricDistortionMultiViewImage",
"NuScenesSparse4DAdaptor",
"LoadMultiViewImageFromFiles",
"LoadPointsFromFile",
]
import torch
import numpy as np
from numpy import random
import mmcv
from mmdet.datasets.builder import PIPELINES
from PIL import Image
@PIPELINES.register_module()
class ResizeCropFlipImage(object):
def __call__(self, results):
aug_config = results.get("aug_config")
if aug_config is None:
return results
imgs = results["img"]
N = len(imgs)
new_imgs = []
for i in range(N):
img, mat = self._img_transform(
np.uint8(imgs[i]), aug_config,
)
new_imgs.append(np.array(img).astype(np.float32))
results["lidar2img"][i] = mat @ results["lidar2img"][i]
if "cam_intrinsic" in results:
results["cam_intrinsic"][i][:3, :3] *= aug_config["resize"]
# results["cam_intrinsic"][i][:3, :3] = (
# mat[:3, :3] @ results["cam_intrinsic"][i][:3, :3]
# )
results["img"] = new_imgs
results["img_shape"] = [x.shape[:2] for x in new_imgs]
return results
def _img_transform(self, img, aug_configs):
H, W = img.shape[:2]
resize = aug_configs.get("resize", 1)
resize_dims = (int(W * resize), int(H * resize))
crop = aug_configs.get("crop", [0, 0, *resize_dims])
flip = aug_configs.get("flip", False)
rotate = aug_configs.get("rotate", 0)
origin_dtype = img.dtype
if origin_dtype != np.uint8:
min_value = img.min()
max_vaule = img.max()
scale = 255 / (max_vaule - min_value)
img = (img - min_value) * scale
img = np.uint8(img)
img = Image.fromarray(img)
img = img.resize(resize_dims).crop(crop)
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
img = np.array(img).astype(np.float32)
if origin_dtype != np.uint8:
img = img.astype(np.float32)
img = img / scale + min_value
transform_matrix = np.eye(3)
transform_matrix[:2, :2] *= resize
transform_matrix[:2, 2] -= np.array(crop[:2])
if flip:
flip_matrix = np.array(
[[-1, 0, crop[2] - crop[0]], [0, 1, 0], [0, 0, 1]]
)
transform_matrix = flip_matrix @ transform_matrix
rotate = rotate / 180 * np.pi
rot_matrix = np.array(
[
[np.cos(rotate), np.sin(rotate), 0],
[-np.sin(rotate), np.cos(rotate), 0],
[0, 0, 1],
]
)
rot_center = np.array([crop[2] - crop[0], crop[3] - crop[1]]) / 2
rot_matrix[:2, 2] = -rot_matrix[:2, :2] @ rot_center + rot_center
transform_matrix = rot_matrix @ transform_matrix
extend_matrix = np.eye(4)
extend_matrix[:3, :3] = transform_matrix
return img, extend_matrix
@PIPELINES.register_module()
class BBoxRotation(object):
def __call__(self, results):
angle = results["aug_config"]["rotate_3d"]
rot_cos = np.cos(angle)
rot_sin = np.sin(angle)
rot_mat = np.array(
[
[rot_cos, -rot_sin, 0, 0],
[rot_sin, rot_cos, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1],
]
)
rot_mat_inv = np.linalg.inv(rot_mat)
num_view = len(results["lidar2img"])
for view in range(num_view):
results["lidar2img"][view] = (
results["lidar2img"][view] @ rot_mat_inv
)
if "lidar2global" in results:
results["lidar2global"] = results["lidar2global"] @ rot_mat_inv
if "gt_bboxes_3d" in results:
results["gt_bboxes_3d"] = self.box_rotate(
results["gt_bboxes_3d"], angle
)
return results
@staticmethod
def box_rotate(bbox_3d, angle):
rot_cos = np.cos(angle)
rot_sin = np.sin(angle)
rot_mat_T = np.array(
[[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]]
)
bbox_3d[:, :3] = bbox_3d[:, :3] @ rot_mat_T
bbox_3d[:, 6] += angle
if bbox_3d.shape[-1] > 7:
vel_dims = bbox_3d[:, 7:].shape[-1]
bbox_3d[:, 7:] = bbox_3d[:, 7:] @ rot_mat_T[:vel_dims, :vel_dims]
return bbox_3d
@PIPELINES.register_module()
class PhotoMetricDistortionMultiViewImage:
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
8. randomly swap channels
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def __init__(
self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18,
):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, results):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
imgs = results["img"]
new_imgs = []
for img in imgs:
assert img.dtype == np.float32, (
"PhotoMetricDistortion needs the input image of dtype np.float32,"
' please set "to_float32=True" in "LoadImageFromFile" pipeline'
)
# random brightness
if random.randint(2):
delta = random.uniform(
-self.brightness_delta, self.brightness_delta
)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(
self.contrast_lower, self.contrast_upper
)
img *= alpha
# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(
self.saturation_lower, self.saturation_upper
)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(
self.contrast_lower, self.contrast_upper
)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
new_imgs.append(img)
results["img"] = new_imgs
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f"(\nbrightness_delta={self.brightness_delta},\n"
repr_str += "contrast_range="
repr_str += f"{(self.contrast_lower, self.contrast_upper)},\n"
repr_str += "saturation_range="
repr_str += f"{(self.saturation_lower, self.saturation_upper)},\n"
repr_str += f"hue_delta={self.hue_delta})"
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment