Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
deformable_aggregation_kernel
Commits
7c0c60e3
Commit
7c0c60e3
authored
Feb 27, 2026
by
change3n8
Browse files
init
parents
Changes
52
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2778 additions
and
0 deletions
+2778
-0
.gitignore
.gitignore
+4
-0
LICENSE
LICENSE
+21
-0
README.md
README.md
+8
-0
demo/forward.py
demo/forward.py
+67
-0
projects/configs/sparse4dv3_temporal_r50_1x8_bs6_256x704.py
projects/configs/sparse4dv3_temporal_r50_1x8_bs6_256x704.py
+457
-0
projects/mmdet3d_plugin/__init__.py
projects/mmdet3d_plugin/__init__.py
+4
-0
projects/mmdet3d_plugin/apis/__init__.py
projects/mmdet3d_plugin/apis/__init__.py
+4
-0
projects/mmdet3d_plugin/apis/mmdet_train.py
projects/mmdet3d_plugin/apis/mmdet_train.py
+219
-0
projects/mmdet3d_plugin/apis/test.py
projects/mmdet3d_plugin/apis/test.py
+171
-0
projects/mmdet3d_plugin/apis/train.py
projects/mmdet3d_plugin/apis/train.py
+62
-0
projects/mmdet3d_plugin/core/box3d.py
projects/mmdet3d_plugin/core/box3d.py
+3
-0
projects/mmdet3d_plugin/core/evaluation/__init__.py
projects/mmdet3d_plugin/core/evaluation/__init__.py
+2
-0
projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
+97
-0
projects/mmdet3d_plugin/datasets/__init__.py
projects/mmdet3d_plugin/datasets/__init__.py
+9
-0
projects/mmdet3d_plugin/datasets/builder.py
projects/mmdet3d_plugin/datasets/builder.py
+192
-0
projects/mmdet3d_plugin/datasets/nuscenes_3d_det_track_dataset.py
.../mmdet3d_plugin/datasets/nuscenes_3d_det_track_dataset.py
+789
-0
projects/mmdet3d_plugin/datasets/pipelines/__init__.py
projects/mmdet3d_plugin/datasets/pipelines/__init__.py
+26
-0
projects/mmdet3d_plugin/datasets/pipelines/augment.py
projects/mmdet3d_plugin/datasets/pipelines/augment.py
+233
-0
projects/mmdet3d_plugin/datasets/pipelines/loading.py
projects/mmdet3d_plugin/datasets/pipelines/loading.py
+188
-0
projects/mmdet3d_plugin/datasets/pipelines/transform.py
projects/mmdet3d_plugin/datasets/pipelines/transform.py
+222
-0
No files found.
.gitignore
0 → 100644
View file @
7c0c60e3
*.pyc
*.npy
*.pth
\ No newline at end of file
LICENSE
0 → 100644
View file @
7c0c60e3
MIT License
Copyright (c) 2024 Horizon Robotics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
README.md
0 → 100644
View file @
7c0c60e3
# build
cd projects/mmdet3d_plugin/ops/
python3 -m pip install -e . --no-build-isolation
# run
demo
python3 forward.py
\ No newline at end of file
demo/forward.py
0 → 100644
View file @
7c0c60e3
import
torch
import
deformable_aggregation_ext.deformable_aggregation_ext
as
da
bs
=
6
cam
=
6
num_feat
=
89760
c
=
256
scale
=
4
group
=
8
anchor
=
1220
pts
=
13
H
,
W
=
44
,
85
# H*W = 3740, 3740 * 6 * 4 = 89760
spatial_shape
=
torch
.
tensor
([[[
H
,
W
]]
*
scale
]
*
cam
,
dtype
=
torch
.
int32
)
# [cam, scale, 2]
scale_start_index
=
torch
.
zeros
((
cam
,
scale
),
dtype
=
torch
.
int32
)
feat_area_per_map
=
H
*
W
# 3740
for
i
in
range
(
cam
):
for
s
in
range
(
scale
):
idx
=
i
*
scale
+
s
scale_start_index
[
i
,
s
]
=
idx
*
feat_area_per_map
# 验证最后一个偏移 + 面积 <= num_feat
last_offset
=
scale_start_index
[
cam
-
1
,
scale
-
1
]
assert
last_offset
+
feat_area_per_map
==
num_feat
,
"Total feature size mismatch!"
#print("scale_start_index[0]:", scale_start_index[0]) # [0, 3740, 7480, 11220]
#print("Total area:", last_offset + feat_area_per_map)
# 其他张量保持不变
feature_maps
=
torch
.
rand
(
bs
,
num_feat
,
c
).
float
().
cuda
()
sampling_location
=
torch
.
rand
(
bs
,
anchor
,
pts
,
cam
,
2
).
float
().
cuda
()
#print(f"size of feature_maps is {feature_maps.size()}")
weights
=
torch
.
rand
(
bs
,
anchor
,
pts
,
cam
,
scale
,
group
).
float
().
cuda
()
# print(feature_maps.size())
# print(spatial_shape.size())
# print(scale_start_index.size())
# print(sampling_location.size())
# print(weights.size())
# print("#######################")
# 调用算子
out
=
da
.
deformable_aggregation_forward
(
feature_maps
,
spatial_shape
.
cuda
(),
scale_start_index
.
cuda
(),
sampling_location
,
weights
)
new_out
=
da
.
deformable_aggregation_forward_ref
(
feature_maps
,
spatial_shape
.
cuda
(),
scale_start_index
.
cuda
(),
sampling_location
,
weights
)
# -------- 计算整体误差 --------
def
compare_tensors
(
t1
,
t2
,
name
=
""
,
rtol
=
1e-4
,
atol
=
1e-6
):
max_abs_diff
=
(
t1
-
t2
).
abs
().
max
().
item
()
is_close
=
torch
.
allclose
(
t1
,
t2
,
rtol
=
rtol
,
atol
=
atol
)
print
(
f
"
{
name
}
: max_abs_diff=
{
max_abs_diff
}
, allclose=
{
is_close
}
"
)
return
max_abs_diff
,
is_close
compare_tensors
(
out
,
new_out
,
"OUTPUT"
)
print
(
"finish"
)
projects/configs/sparse4dv3_temporal_r50_1x8_bs6_256x704.py
0 → 100644
View file @
7c0c60e3
"""
mAP: 0.4647
mATE: 0.5403
mASE: 0.2623
mAOE: 0.4590
mAVE: 0.2198
mAAE: 0.2059
NDS: 0.5636
Eval time: 176.9s
Per-class results:
Object Class AP ATE ASE AOE AVE AAE
car 0.668 0.357 0.142 0.054 0.184 0.195
truck 0.394 0.528 0.187 0.052 0.163 0.210
bus 0.451 0.681 0.196 0.070 0.383 0.243
trailer 0.185 0.971 0.247 0.634 0.175 0.202
construction_vehicle 0.122 0.879 0.496 1.200 0.136 0.406
pedestrian 0.559 0.517 0.287 0.513 0.282 0.151
motorcycle 0.497 0.462 0.238 0.536 0.293 0.236
bicycle 0.426 0.441 0.257 0.951 0.142 0.004
traffic_cone 0.697 0.275 0.299 nan nan nan
barrier 0.648 0.292 0.275 0.122 nan nan
"""
"""
Per-class results:
AMOTA AMOTP RECALL MOTAR GT MOTA MOTP MT ML FAF TP FP FN IDS FRAG TID LGD
bicycle 0.444 1.169 0.533 0.733 1993 0.389 0.566 53 57 19.3 1059 283 931 3 8 1.60 1.75
bus 0.559 1.175 0.626 0.824 2112 0.515 0.751 42 35 14.8 1321 233 790 1 20 1.13 1.95
car 0.678 0.755 0.733 0.819 58317 0.599 0.470 2053 1073 134.2 42626 7706 15565 126 295 0.76 1.03
motorcy 0.522 1.060 0.609 0.823 1977 0.497 0.564 50 38 15.7 1194 211 773 10 17 1.97 2.17
pedestr 0.548 1.059 0.652 0.791 25423 0.506 0.678 677 467 77.6 16274 3404 8854 295 225 1.33 1.85
trailer 0.136 1.603 0.383 0.403 2425 0.154 0.981 30 79 52.6 926 553 1496 3 13 1.49 2.64
truck 0.454 1.132 0.577 0.691 9650 0.399 0.594 210 214 45.7 5569 1723 4078 3 50 1.35 1.85
Aggregated results:
AMOTA 0.477
AMOTP 1.136
RECALL 0.588
MOTAR 0.726
GT 14556
MOTA 0.437
MOTP 0.658
MT 3115
ML 1963
FAF 51.4
TP 68969
FP 14113
FN 32487
IDS 441
FRAG 628
TID 1.37
LGD 1.89
"""
# ================ base config ===================
plugin
=
True
plugin_dir
=
"projects/mmdet3d_plugin/"
dist_params
=
dict
(
backend
=
"nccl"
)
log_level
=
"INFO"
work_dir
=
None
total_batch_size
=
48
num_gpus
=
8
batch_size
=
total_batch_size
//
num_gpus
num_iters_per_epoch
=
int
(
28130
//
(
num_gpus
*
batch_size
))
num_epochs
=
100
checkpoint_epoch_interval
=
20
checkpoint_config
=
dict
(
interval
=
num_iters_per_epoch
*
checkpoint_epoch_interval
)
log_config
=
dict
(
interval
=
51
,
hooks
=
[
dict
(
type
=
"TextLoggerHook"
,
by_epoch
=
False
),
dict
(
type
=
"TensorboardLoggerHook"
),
],
)
load_from
=
None
resume_from
=
None
workflow
=
[(
"train"
,
1
)]
fp16
=
dict
(
loss_scale
=
32.0
)
input_shape
=
(
704
,
256
)
tracking_test
=
True
tracking_threshold
=
0.2
# ================== model ========================
class_names
=
[
"car"
,
"truck"
,
"construction_vehicle"
,
"bus"
,
"trailer"
,
"barrier"
,
"motorcycle"
,
"bicycle"
,
"pedestrian"
,
"traffic_cone"
,
]
num_classes
=
len
(
class_names
)
embed_dims
=
256
num_groups
=
8
num_decoder
=
6
num_single_frame_decoder
=
1
use_deformable_func
=
True
# mmdet3d_plugin/ops/setup.py needs to be executed
strides
=
[
4
,
8
,
16
,
32
]
num_levels
=
len
(
strides
)
num_depth_layers
=
3
drop_out
=
0.1
temporal
=
True
decouple_attn
=
True
with_quality_estimation
=
True
model
=
dict
(
type
=
"Sparse4D"
,
use_grid_mask
=
True
,
use_deformable_func
=
use_deformable_func
,
img_backbone
=
dict
(
type
=
"ResNet"
,
depth
=
50
,
num_stages
=
4
,
frozen_stages
=-
1
,
norm_eval
=
False
,
style
=
"pytorch"
,
with_cp
=
True
,
out_indices
=
(
0
,
1
,
2
,
3
),
norm_cfg
=
dict
(
type
=
"BN"
,
requires_grad
=
True
),
pretrained
=
"ckpt/resnet50-19c8e357.pth"
,
),
img_neck
=
dict
(
type
=
"FPN"
,
num_outs
=
num_levels
,
start_level
=
0
,
out_channels
=
embed_dims
,
add_extra_convs
=
"on_output"
,
relu_before_extra_convs
=
True
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
),
depth_branch
=
dict
(
# for auxiliary supervision only
type
=
"DenseDepthNet"
,
embed_dims
=
embed_dims
,
num_depth_layers
=
num_depth_layers
,
loss_weight
=
0.2
,
),
head
=
dict
(
type
=
"Sparse4DHead"
,
cls_threshold_to_reg
=
0.05
,
decouple_attn
=
decouple_attn
,
instance_bank
=
dict
(
type
=
"InstanceBank"
,
num_anchor
=
900
,
embed_dims
=
embed_dims
,
anchor
=
"nuscenes_kmeans900.npy"
,
anchor_handler
=
dict
(
type
=
"SparseBox3DKeyPointsGenerator"
),
num_temp_instances
=
600
if
temporal
else
-
1
,
confidence_decay
=
0.6
,
feat_grad
=
False
,
),
anchor_encoder
=
dict
(
type
=
"SparseBox3DEncoder"
,
vel_dims
=
3
,
embed_dims
=
[
128
,
32
,
32
,
64
]
if
decouple_attn
else
256
,
mode
=
"cat"
if
decouple_attn
else
"add"
,
output_fc
=
not
decouple_attn
,
in_loops
=
1
,
out_loops
=
4
if
decouple_attn
else
2
,
),
num_single_frame_decoder
=
num_single_frame_decoder
,
operation_order
=
(
[
"gnn"
,
"norm"
,
"deformable"
,
"ffn"
,
"norm"
,
"refine"
,
]
*
num_single_frame_decoder
+
[
"temp_gnn"
,
"gnn"
,
"norm"
,
"deformable"
,
"ffn"
,
"norm"
,
"refine"
,
]
*
(
num_decoder
-
num_single_frame_decoder
)
)[
2
:],
temp_graph_model
=
dict
(
type
=
"MultiheadAttention"
,
embed_dims
=
embed_dims
if
not
decouple_attn
else
embed_dims
*
2
,
num_heads
=
num_groups
,
batch_first
=
True
,
dropout
=
drop_out
,
)
if
temporal
else
None
,
graph_model
=
dict
(
type
=
"MultiheadAttention"
,
embed_dims
=
embed_dims
if
not
decouple_attn
else
embed_dims
*
2
,
num_heads
=
num_groups
,
batch_first
=
True
,
dropout
=
drop_out
,
),
norm_layer
=
dict
(
type
=
"LN"
,
normalized_shape
=
embed_dims
),
ffn
=
dict
(
type
=
"AsymmetricFFN"
,
in_channels
=
embed_dims
*
2
,
pre_norm
=
dict
(
type
=
"LN"
),
embed_dims
=
embed_dims
,
feedforward_channels
=
embed_dims
*
4
,
num_fcs
=
2
,
ffn_drop
=
drop_out
,
act_cfg
=
dict
(
type
=
"ReLU"
,
inplace
=
True
),
),
deformable_model
=
dict
(
type
=
"DeformableFeatureAggregation"
,
embed_dims
=
embed_dims
,
num_groups
=
num_groups
,
num_levels
=
num_levels
,
num_cams
=
6
,
attn_drop
=
0.15
,
use_deformable_func
=
use_deformable_func
,
use_camera_embed
=
True
,
residual_mode
=
"cat"
,
kps_generator
=
dict
(
type
=
"SparseBox3DKeyPointsGenerator"
,
num_learnable_pts
=
6
,
fix_scale
=
[
[
0
,
0
,
0
],
[
0.45
,
0
,
0
],
[
-
0.45
,
0
,
0
],
[
0
,
0.45
,
0
],
[
0
,
-
0.45
,
0
],
[
0
,
0
,
0.45
],
[
0
,
0
,
-
0.45
],
],
),
),
refine_layer
=
dict
(
type
=
"SparseBox3DRefinementModule"
,
embed_dims
=
embed_dims
,
num_cls
=
num_classes
,
refine_yaw
=
True
,
with_quality_estimation
=
with_quality_estimation
,
),
sampler
=
dict
(
type
=
"SparseBox3DTarget"
,
num_dn_groups
=
5
,
num_temp_dn_groups
=
3
,
dn_noise_scale
=
[
2.0
]
*
3
+
[
0.5
]
*
7
,
max_dn_gt
=
32
,
add_neg_dn
=
True
,
cls_weight
=
2.0
,
box_weight
=
0.25
,
reg_weights
=
[
2.0
]
*
3
+
[
0.5
]
*
3
+
[
0.0
]
*
4
,
cls_wise_reg_weights
=
{
class_names
.
index
(
"traffic_cone"
):
[
2.0
,
2.0
,
2.0
,
1.0
,
1.0
,
1.0
,
0.0
,
0.0
,
1.0
,
1.0
,
],
},
),
loss_cls
=
dict
(
type
=
"FocalLoss"
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
2.0
,
),
loss_reg
=
dict
(
type
=
"SparseBox3DLoss"
,
loss_box
=
dict
(
type
=
"L1Loss"
,
loss_weight
=
0.25
),
loss_centerness
=
dict
(
type
=
"CrossEntropyLoss"
,
use_sigmoid
=
True
),
loss_yawness
=
dict
(
type
=
"GaussianFocalLoss"
),
cls_allow_reverse
=
[
class_names
.
index
(
"barrier"
)],
),
decoder
=
dict
(
type
=
"SparseBox3DDecoder"
),
reg_weights
=
[
2.0
]
*
3
+
[
1.0
]
*
7
,
),
)
# ================== data ========================
dataset_type
=
"NuScenes3DDetTrackDataset"
data_root
=
"data/nuscenes/"
anno_root
=
"data/nuscenes_cam/"
anno_root
=
"data/nuscenes_anno_pkls/"
file_client_args
=
dict
(
backend
=
"disk"
)
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
dict
(
type
=
"LoadMultiViewImageFromFiles"
,
to_float32
=
True
),
dict
(
type
=
"LoadPointsFromFile"
,
coord_type
=
"LIDAR"
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
,
),
dict
(
type
=
"ResizeCropFlipImage"
),
dict
(
type
=
"MultiScaleDepthMapGenerator"
,
downsample
=
strides
[:
num_depth_layers
],
),
dict
(
type
=
"BBoxRotation"
),
dict
(
type
=
"PhotoMetricDistortionMultiViewImage"
),
dict
(
type
=
"NormalizeMultiviewImage"
,
**
img_norm_cfg
),
dict
(
type
=
"CircleObjectRangeFilter"
,
class_dist_thred
=
[
55
]
*
len
(
class_names
),
),
dict
(
type
=
"InstanceNameFilter"
,
classes
=
class_names
),
dict
(
type
=
"NuScenesSparse4DAdaptor"
),
dict
(
type
=
"Collect"
,
keys
=
[
"img"
,
"timestamp"
,
"projection_mat"
,
"image_wh"
,
"gt_depth"
,
"focal"
,
"gt_bboxes_3d"
,
"gt_labels_3d"
,
],
meta_keys
=
[
"T_global"
,
"T_global_inv"
,
"timestamp"
,
"instance_id"
],
),
]
test_pipeline
=
[
dict
(
type
=
"LoadMultiViewImageFromFiles"
,
to_float32
=
True
),
dict
(
type
=
"ResizeCropFlipImage"
),
dict
(
type
=
"NormalizeMultiviewImage"
,
**
img_norm_cfg
),
dict
(
type
=
"NuScenesSparse4DAdaptor"
),
dict
(
type
=
"Collect"
,
keys
=
[
"img"
,
"timestamp"
,
"projection_mat"
,
"image_wh"
,
],
meta_keys
=
[
"T_global"
,
"T_global_inv"
,
"timestamp"
],
),
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
,
)
data_basic_config
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
classes
=
class_names
,
modality
=
input_modality
,
version
=
"v1.0-trainval"
,
)
data_aug_conf
=
{
"resize_lim"
:
(
0.40
,
0.47
),
"final_dim"
:
input_shape
[::
-
1
],
"bot_pct_lim"
:
(
0.0
,
0.0
),
"rot_lim"
:
(
-
5.4
,
5.4
),
"H"
:
900
,
"W"
:
1600
,
"rand_flip"
:
True
,
"rot3d_range"
:
[
-
0.3925
,
0.3925
],
}
data
=
dict
(
samples_per_gpu
=
batch_size
,
workers_per_gpu
=
batch_size
,
train
=
dict
(
**
data_basic_config
,
ann_file
=
anno_root
+
"nuscenes_infos_train.pkl"
,
pipeline
=
train_pipeline
,
test_mode
=
False
,
data_aug_conf
=
data_aug_conf
,
with_seq_flag
=
True
,
sequences_split_num
=
2
,
keep_consistent_seq_aug
=
True
,
),
val
=
dict
(
**
data_basic_config
,
ann_file
=
anno_root
+
"nuscenes_infos_val.pkl"
,
pipeline
=
test_pipeline
,
data_aug_conf
=
data_aug_conf
,
test_mode
=
True
,
tracking
=
tracking_test
,
tracking_threshold
=
tracking_threshold
,
),
test
=
dict
(
**
data_basic_config
,
ann_file
=
anno_root
+
"nuscenes_infos_val.pkl"
,
pipeline
=
test_pipeline
,
data_aug_conf
=
data_aug_conf
,
test_mode
=
True
,
tracking
=
tracking_test
,
tracking_threshold
=
tracking_threshold
,
),
)
# ================== training ========================
optimizer
=
dict
(
type
=
"AdamW"
,
lr
=
6e-4
,
weight_decay
=
0.001
,
paramwise_cfg
=
dict
(
custom_keys
=
{
"img_backbone"
:
dict
(
lr_mult
=
0.5
),
}
),
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
25
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
"CosineAnnealing"
,
warmup
=
"linear"
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
,
)
runner
=
dict
(
type
=
"IterBasedRunner"
,
max_iters
=
num_iters_per_epoch
*
num_epochs
,
)
# ================== eval ========================
vis_pipeline
=
[
dict
(
type
=
"LoadMultiViewImageFromFiles"
,
to_float32
=
True
),
dict
(
type
=
"Collect"
,
keys
=
[
"img"
],
meta_keys
=
[
"timestamp"
,
"lidar2img"
],
),
]
evaluation
=
dict
(
interval
=
num_iters_per_epoch
*
checkpoint_epoch_interval
,
pipeline
=
vis_pipeline
,
# out_dir="./vis", # for visualization
)
projects/mmdet3d_plugin/__init__.py
0 → 100644
View file @
7c0c60e3
from
.datasets
import
*
from
.models
import
*
from
.apis
import
*
from
.core.evaluation
import
*
projects/mmdet3d_plugin/apis/__init__.py
0 → 100644
View file @
7c0c60e3
from
.train
import
custom_train_model
from
.mmdet_train
import
custom_train_detector
# from .test import custom_multi_gpu_test
projects/mmdet3d_plugin/apis/mmdet_train.py
0 → 100644
View file @
7c0c60e3
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import
random
import
warnings
import
numpy
as
np
import
torch
import
torch.distributed
as
dist
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
(
HOOKS
,
DistSamplerSeedHook
,
EpochBasedRunner
,
Fp16OptimizerHook
,
OptimizerHook
,
build_optimizer
,
build_runner
,
get_dist_info
,
)
from
mmcv.utils
import
build_from_cfg
from
mmdet.core
import
EvalHook
from
mmdet.datasets
import
build_dataset
,
replace_ImageToTensor
from
mmdet.utils
import
get_root_logger
import
time
import
os.path
as
osp
from
projects.mmdet3d_plugin.datasets.builder
import
build_dataloader
from
projects.mmdet3d_plugin.core.evaluation.eval_hooks
import
(
CustomDistEvalHook
,
)
from
projects.mmdet3d_plugin.datasets
import
custom_build_dataset
def
custom_train_detector
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
,
):
logger
=
get_root_logger
(
cfg
.
log_level
)
# prepare data loaders
dataset
=
dataset
if
isinstance
(
dataset
,
(
list
,
tuple
))
else
[
dataset
]
# assert len(dataset)==1s
if
"imgs_per_gpu"
in
cfg
.
data
:
logger
.
warning
(
'"imgs_per_gpu" is deprecated in MMDet V2.0. '
'Please use "samples_per_gpu" instead'
)
if
"samples_per_gpu"
in
cfg
.
data
:
logger
.
warning
(
f
'Got "imgs_per_gpu"=
{
cfg
.
data
.
imgs_per_gpu
}
and '
f
'"samples_per_gpu"=
{
cfg
.
data
.
samples_per_gpu
}
, "imgs_per_gpu"'
f
"=
{
cfg
.
data
.
imgs_per_gpu
}
is used in this experiments"
)
else
:
logger
.
warning
(
'Automatically set "samples_per_gpu"="imgs_per_gpu"='
f
"
{
cfg
.
data
.
imgs_per_gpu
}
in this experiments"
)
cfg
.
data
.
samples_per_gpu
=
cfg
.
data
.
imgs_per_gpu
if
"runner"
in
cfg
:
runner_type
=
cfg
.
runner
[
"type"
]
else
:
runner_type
=
"EpochBasedRunner"
data_loaders
=
[
build_dataloader
(
ds
,
cfg
.
data
.
samples_per_gpu
,
cfg
.
data
.
workers_per_gpu
,
# cfg.gpus will be ignored if distributed
len
(
cfg
.
gpu_ids
),
dist
=
distributed
,
seed
=
cfg
.
seed
,
nonshuffler_sampler
=
dict
(
type
=
"DistributedSampler"
),
# dict(type='DistributedSampler'),
runner_type
=
runner_type
,
)
for
ds
in
dataset
]
# put model on gpus
if
distributed
:
find_unused_parameters
=
cfg
.
get
(
"find_unused_parameters"
,
False
)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model
=
MMDistributedDataParallel
(
model
.
cuda
(),
device_ids
=
[
torch
.
cuda
.
current_device
()],
broadcast_buffers
=
False
,
find_unused_parameters
=
find_unused_parameters
,
)
else
:
model
=
MMDataParallel
(
model
.
cuda
(
cfg
.
gpu_ids
[
0
]),
device_ids
=
cfg
.
gpu_ids
)
# build runner
optimizer
=
build_optimizer
(
model
,
cfg
.
optimizer
)
if
"runner"
not
in
cfg
:
cfg
.
runner
=
{
"type"
:
"EpochBasedRunner"
,
"max_epochs"
:
cfg
.
total_epochs
,
}
warnings
.
warn
(
"config is now expected to have a `runner` section, "
"please set `runner` in your config."
,
UserWarning
,
)
else
:
if
"total_epochs"
in
cfg
:
assert
cfg
.
total_epochs
==
cfg
.
runner
.
max_epochs
runner
=
build_runner
(
cfg
.
runner
,
default_args
=
dict
(
model
=
model
,
optimizer
=
optimizer
,
work_dir
=
cfg
.
work_dir
,
logger
=
logger
,
meta
=
meta
,
),
)
# an ugly workaround to make .log and .log.json filenames the same
runner
.
timestamp
=
timestamp
# fp16 setting
fp16_cfg
=
cfg
.
get
(
"fp16"
,
None
)
if
fp16_cfg
is
not
None
:
optimizer_config
=
Fp16OptimizerHook
(
**
cfg
.
optimizer_config
,
**
fp16_cfg
,
distributed
=
distributed
)
elif
distributed
and
"type"
not
in
cfg
.
optimizer_config
:
optimizer_config
=
OptimizerHook
(
**
cfg
.
optimizer_config
)
else
:
optimizer_config
=
cfg
.
optimizer_config
# register hooks
runner
.
register_training_hooks
(
cfg
.
lr_config
,
optimizer_config
,
cfg
.
checkpoint_config
,
cfg
.
log_config
,
cfg
.
get
(
"momentum_config"
,
None
),
)
# register profiler hook
# trace_config = dict(type='tb_trace', dir_name='work_dir')
# profiler_config = dict(on_trace_ready=trace_config)
# runner.register_profiler_hook(profiler_config)
if
distributed
:
if
isinstance
(
runner
,
EpochBasedRunner
):
runner
.
register_hook
(
DistSamplerSeedHook
())
# register eval hooks
if
validate
:
# Support batch_size > 1 in validation
val_samples_per_gpu
=
cfg
.
data
.
val
.
pop
(
"samples_per_gpu"
,
1
)
if
val_samples_per_gpu
>
1
:
assert
False
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg
.
data
.
val
.
pipeline
=
replace_ImageToTensor
(
cfg
.
data
.
val
.
pipeline
)
val_dataset
=
custom_build_dataset
(
cfg
.
data
.
val
,
dict
(
test_mode
=
True
))
val_dataloader
=
build_dataloader
(
val_dataset
,
samples_per_gpu
=
val_samples_per_gpu
,
workers_per_gpu
=
cfg
.
data
.
workers_per_gpu
,
dist
=
distributed
,
shuffle
=
False
,
nonshuffler_sampler
=
dict
(
type
=
"DistributedSampler"
),
)
eval_cfg
=
cfg
.
get
(
"evaluation"
,
{})
eval_cfg
[
"by_epoch"
]
=
cfg
.
runner
[
"type"
]
!=
"IterBasedRunner"
eval_cfg
[
"jsonfile_prefix"
]
=
osp
.
join
(
"val"
,
cfg
.
work_dir
,
time
.
ctime
().
replace
(
" "
,
"_"
).
replace
(
":"
,
"_"
),
)
eval_hook
=
CustomDistEvalHook
if
distributed
else
EvalHook
runner
.
register_hook
(
eval_hook
(
val_dataloader
,
**
eval_cfg
))
# user-defined hooks
if
cfg
.
get
(
"custom_hooks"
,
None
):
custom_hooks
=
cfg
.
custom_hooks
assert
isinstance
(
custom_hooks
,
list
),
f
"custom_hooks expect list type, but got
{
type
(
custom_hooks
)
}
"
for
hook_cfg
in
cfg
.
custom_hooks
:
assert
isinstance
(
hook_cfg
,
dict
),
(
"Each item in custom_hooks expects dict type, but got "
f
"
{
type
(
hook_cfg
)
}
"
)
hook_cfg
=
hook_cfg
.
copy
()
priority
=
hook_cfg
.
pop
(
"priority"
,
"NORMAL"
)
hook
=
build_from_cfg
(
hook_cfg
,
HOOKS
)
runner
.
register_hook
(
hook
,
priority
=
priority
)
if
cfg
.
resume_from
:
runner
.
resume
(
cfg
.
resume_from
)
elif
cfg
.
load_from
:
runner
.
load_checkpoint
(
cfg
.
load_from
)
runner
.
run
(
data_loaders
,
cfg
.
workflow
)
projects/mmdet3d_plugin/apis/test.py
0 → 100644
View file @
7c0c60e3
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import
os.path
as
osp
import
pickle
import
shutil
import
tempfile
import
time
import
mmcv
import
torch
import
torch.distributed
as
dist
from
mmcv.image
import
tensor2imgs
from
mmcv.runner
import
get_dist_info
from
mmdet.core
import
encode_mask_results
import
mmcv
import
numpy
as
np
import
pycocotools.mask
as
mask_util
def
custom_encode_mask_results
(
mask_results
):
"""Encode bitmap mask to RLE code. Semantic Masks only
Args:
mask_results (list | tuple[list]): bitmap mask results.
In mask scoring rcnn, mask_results is a tuple of (segm_results,
segm_cls_score).
Returns:
list | tuple: RLE encoded mask.
"""
cls_segms
=
mask_results
num_classes
=
len
(
cls_segms
)
encoded_mask_results
=
[]
for
i
in
range
(
len
(
cls_segms
)):
encoded_mask_results
.
append
(
mask_util
.
encode
(
np
.
array
(
cls_segms
[
i
][:,
:,
np
.
newaxis
],
order
=
"F"
,
dtype
=
"uint8"
)
)[
0
]
)
# encoded with RLE
return
[
encoded_mask_results
]
def
custom_multi_gpu_test
(
model
,
data_loader
,
tmpdir
=
None
,
gpu_collect
=
False
):
"""Test model with multiple gpus.
This method tests model with multiple gpus and collects the results
under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
it encodes results to gpu tensors and use gpu communication for results
collection. On cpu mode it saves the results on different gpus to 'tmpdir'
and collects them by the rank 0 worker.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode.
gpu_collect (bool): Option to use either gpu or cpu to collect results.
Returns:
list: The prediction results.
"""
model
.
eval
()
bbox_results
=
[]
mask_results
=
[]
dataset
=
data_loader
.
dataset
rank
,
world_size
=
get_dist_info
()
if
rank
==
0
:
prog_bar
=
mmcv
.
ProgressBar
(
len
(
dataset
))
time
.
sleep
(
2
)
# This line can prevent deadlock problem in some cases.
have_mask
=
False
for
i
,
data
in
enumerate
(
data_loader
):
with
torch
.
no_grad
():
result
=
model
(
return_loss
=
False
,
rescale
=
True
,
**
data
)
# encode mask results
if
isinstance
(
result
,
dict
):
if
"bbox_results"
in
result
.
keys
():
bbox_result
=
result
[
"bbox_results"
]
batch_size
=
len
(
result
[
"bbox_results"
])
bbox_results
.
extend
(
bbox_result
)
if
(
"mask_results"
in
result
.
keys
()
and
result
[
"mask_results"
]
is
not
None
):
mask_result
=
custom_encode_mask_results
(
result
[
"mask_results"
]
)
mask_results
.
extend
(
mask_result
)
have_mask
=
True
else
:
batch_size
=
len
(
result
)
bbox_results
.
extend
(
result
)
if
rank
==
0
:
for
_
in
range
(
batch_size
*
world_size
):
prog_bar
.
update
()
# collect results from all ranks
if
gpu_collect
:
bbox_results
=
collect_results_gpu
(
bbox_results
,
len
(
dataset
))
if
have_mask
:
mask_results
=
collect_results_gpu
(
mask_results
,
len
(
dataset
))
else
:
mask_results
=
None
else
:
bbox_results
=
collect_results_cpu
(
bbox_results
,
len
(
dataset
),
tmpdir
)
tmpdir
=
tmpdir
+
"_mask"
if
tmpdir
is
not
None
else
None
if
have_mask
:
mask_results
=
collect_results_cpu
(
mask_results
,
len
(
dataset
),
tmpdir
)
else
:
mask_results
=
None
if
mask_results
is
None
:
return
bbox_results
return
{
"bbox_results"
:
bbox_results
,
"mask_results"
:
mask_results
}
def
collect_results_cpu
(
result_part
,
size
,
tmpdir
=
None
):
rank
,
world_size
=
get_dist_info
()
# create a tmp dir if it is not specified
if
tmpdir
is
None
:
MAX_LEN
=
512
# 32 is whitespace
dir_tensor
=
torch
.
full
(
(
MAX_LEN
,),
32
,
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
if
rank
==
0
:
mmcv
.
mkdir_or_exist
(
".dist_test"
)
tmpdir
=
tempfile
.
mkdtemp
(
dir
=
".dist_test"
)
tmpdir
=
torch
.
tensor
(
bytearray
(
tmpdir
.
encode
()),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
dir_tensor
[:
len
(
tmpdir
)]
=
tmpdir
dist
.
broadcast
(
dir_tensor
,
0
)
tmpdir
=
dir_tensor
.
cpu
().
numpy
().
tobytes
().
decode
().
rstrip
()
else
:
mmcv
.
mkdir_or_exist
(
tmpdir
)
# dump the part result to the dir
mmcv
.
dump
(
result_part
,
osp
.
join
(
tmpdir
,
f
"part_
{
rank
}
.pkl"
))
dist
.
barrier
()
# collect all parts
if
rank
!=
0
:
return
None
else
:
# load results of all parts from tmp dir
part_list
=
[]
for
i
in
range
(
world_size
):
part_file
=
osp
.
join
(
tmpdir
,
f
"part_
{
i
}
.pkl"
)
part_list
.
append
(
mmcv
.
load
(
part_file
))
# sort the results
ordered_results
=
[]
"""
bacause we change the sample of the evaluation stage to make sure that
each gpu will handle continuous sample,
"""
# for res in zip(*part_list):
for
res
in
part_list
:
ordered_results
.
extend
(
list
(
res
))
# the dataloader may pad some samples
ordered_results
=
ordered_results
[:
size
]
# remove tmp dir
shutil
.
rmtree
(
tmpdir
)
return
ordered_results
def
collect_results_gpu
(
result_part
,
size
):
collect_results_cpu
(
result_part
,
size
)
projects/mmdet3d_plugin/apis/train.py
0 → 100644
View file @
7c0c60e3
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from
.mmdet_train
import
custom_train_detector
# from mmseg.apis import train_segmentor
from
mmdet.apis
import
train_detector
def
custom_train_model
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
,
):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
if
cfg
.
model
.
type
in
[
"EncoderDecoder3D"
]:
assert
False
else
:
custom_train_detector
(
model
,
dataset
,
cfg
,
distributed
=
distributed
,
validate
=
validate
,
timestamp
=
timestamp
,
meta
=
meta
,
)
def
train_model
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
,
):
"""A function wrapper for launching model training according to cfg.
Because we need different eval_hook in runner. Should be deprecated in the
future.
"""
train_detector
(
model
,
dataset
,
cfg
,
distributed
=
distributed
,
validate
=
validate
,
timestamp
=
timestamp
,
meta
=
meta
,
)
projects/mmdet3d_plugin/core/box3d.py
0 → 100644
View file @
7c0c60e3
X
,
Y
,
Z
,
W
,
L
,
H
,
SIN_YAW
,
COS_YAW
,
VX
,
VY
,
VZ
=
list
(
range
(
11
))
# undecoded
CNS
,
YNS
=
0
,
1
# centerness and yawness indices in qulity
YAW
=
6
# decoded
projects/mmdet3d_plugin/core/evaluation/__init__.py
0 → 100644
View file @
7c0c60e3
from
.eval_hooks
import
CustomDistEvalHook
\ No newline at end of file
projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
0 → 100644
View file @
7c0c60e3
# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
# in order to avoid strong version dependency, we did not directly
# inherit EvalHook but BaseDistEvalHook.
import
bisect
import
os.path
as
osp
import
mmcv
import
torch.distributed
as
dist
from
mmcv.runner
import
DistEvalHook
as
BaseDistEvalHook
from
mmcv.runner
import
EvalHook
as
BaseEvalHook
from
torch.nn.modules.batchnorm
import
_BatchNorm
from
mmdet.core.evaluation.eval_hooks
import
DistEvalHook
def
_calc_dynamic_intervals
(
start_interval
,
dynamic_interval_list
):
assert
mmcv
.
is_list_of
(
dynamic_interval_list
,
tuple
)
dynamic_milestones
=
[
0
]
dynamic_milestones
.
extend
(
[
dynamic_interval
[
0
]
for
dynamic_interval
in
dynamic_interval_list
]
)
dynamic_intervals
=
[
start_interval
]
dynamic_intervals
.
extend
(
[
dynamic_interval
[
1
]
for
dynamic_interval
in
dynamic_interval_list
]
)
return
dynamic_milestones
,
dynamic_intervals
class
CustomDistEvalHook
(
BaseDistEvalHook
):
def
__init__
(
self
,
*
args
,
dynamic_intervals
=
None
,
**
kwargs
):
super
(
CustomDistEvalHook
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
use_dynamic_intervals
=
dynamic_intervals
is
not
None
if
self
.
use_dynamic_intervals
:
(
self
.
dynamic_milestones
,
self
.
dynamic_intervals
,
)
=
_calc_dynamic_intervals
(
self
.
interval
,
dynamic_intervals
)
def
_decide_interval
(
self
,
runner
):
if
self
.
use_dynamic_intervals
:
progress
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
step
=
bisect
.
bisect
(
self
.
dynamic_milestones
,
(
progress
+
1
))
# Dynamically modify the evaluation interval
self
.
interval
=
self
.
dynamic_intervals
[
step
-
1
]
def
before_train_epoch
(
self
,
runner
):
"""Evaluate the model only at the start of training by epoch."""
self
.
_decide_interval
(
runner
)
super
().
before_train_epoch
(
runner
)
def
before_train_iter
(
self
,
runner
):
self
.
_decide_interval
(
runner
)
super
().
before_train_iter
(
runner
)
def
_do_evaluate
(
self
,
runner
):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if
self
.
broadcast_bn_buffer
:
model
=
runner
.
model
for
name
,
module
in
model
.
named_modules
():
if
(
isinstance
(
module
,
_BatchNorm
)
and
module
.
track_running_stats
):
dist
.
broadcast
(
module
.
running_var
,
0
)
dist
.
broadcast
(
module
.
running_mean
,
0
)
if
not
self
.
_should_evaluate
(
runner
):
return
tmpdir
=
self
.
tmpdir
if
tmpdir
is
None
:
tmpdir
=
osp
.
join
(
runner
.
work_dir
,
".eval_hook"
)
from
projects.mmdet3d_plugin.apis.test
import
(
custom_multi_gpu_test
,
)
# to solve circlur import
results
=
custom_multi_gpu_test
(
runner
.
model
,
self
.
dataloader
,
tmpdir
=
tmpdir
,
gpu_collect
=
self
.
gpu_collect
,
)
if
runner
.
rank
==
0
:
print
(
"
\n
"
)
runner
.
log_buffer
.
output
[
"eval_iter_num"
]
=
len
(
self
.
dataloader
)
key_score
=
self
.
evaluate
(
runner
,
results
)
if
self
.
save_best
:
self
.
_save_ckpt
(
runner
,
key_score
)
projects/mmdet3d_plugin/datasets/__init__.py
0 → 100644
View file @
7c0c60e3
from
.nuscenes_3d_det_track_dataset
import
NuScenes3DDetTrackDataset
from
.builder
import
*
from
.pipelines
import
*
from
.samplers
import
*
__all__
=
[
'NuScenes3DDetTrackDataset'
,
"custom_build_dataset"
,
]
projects/mmdet3d_plugin/datasets/builder.py
0 → 100644
View file @
7c0c60e3
import
copy
import
platform
import
random
from
functools
import
partial
import
numpy
as
np
from
mmcv.parallel
import
collate
from
mmcv.runner
import
get_dist_info
from
mmcv.utils
import
Registry
,
build_from_cfg
from
torch.utils.data
import
DataLoader
from
mmdet.datasets.samplers
import
GroupSampler
from
projects.mmdet3d_plugin.datasets.samplers
import
(
GroupInBatchSampler
,
DistributedGroupSampler
,
DistributedSampler
,
build_sampler
)
def
build_dataloader
(
dataset
,
samples_per_gpu
,
workers_per_gpu
,
num_gpus
=
1
,
dist
=
True
,
shuffle
=
True
,
seed
=
None
,
shuffler_sampler
=
None
,
nonshuffler_sampler
=
None
,
runner_type
=
"EpochBasedRunner"
,
**
kwargs
):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank
,
world_size
=
get_dist_info
()
batch_sampler
=
None
if
runner_type
==
'IterBasedRunner'
:
print
(
"Use GroupInBatchSampler !!!"
)
batch_sampler
=
GroupInBatchSampler
(
dataset
,
samples_per_gpu
,
world_size
,
rank
,
seed
=
seed
,
)
batch_size
=
1
sampler
=
None
num_workers
=
workers_per_gpu
elif
dist
:
# DistributedGroupSampler will definitely shuffle the data to satisfy
# that images on each GPU are in the same group
if
shuffle
:
print
(
"Use DistributedGroupSampler !!!"
)
sampler
=
build_sampler
(
shuffler_sampler
if
shuffler_sampler
is
not
None
else
dict
(
type
=
"DistributedGroupSampler"
),
dict
(
dataset
=
dataset
,
samples_per_gpu
=
samples_per_gpu
,
num_replicas
=
world_size
,
rank
=
rank
,
seed
=
seed
,
),
)
else
:
sampler
=
build_sampler
(
nonshuffler_sampler
if
nonshuffler_sampler
is
not
None
else
dict
(
type
=
"DistributedSampler"
),
dict
(
dataset
=
dataset
,
num_replicas
=
world_size
,
rank
=
rank
,
shuffle
=
shuffle
,
seed
=
seed
,
),
)
batch_size
=
samples_per_gpu
num_workers
=
workers_per_gpu
else
:
# assert False, 'not support in bevformer'
print
(
"WARNING!!!!, Only can be used for obtain inference speed!!!!"
)
sampler
=
GroupSampler
(
dataset
,
samples_per_gpu
)
if
shuffle
else
None
batch_size
=
num_gpus
*
samples_per_gpu
num_workers
=
num_gpus
*
workers_per_gpu
init_fn
=
(
partial
(
worker_init_fn
,
num_workers
=
num_workers
,
rank
=
rank
,
seed
=
seed
)
if
seed
is
not
None
else
None
)
data_loader
=
DataLoader
(
dataset
,
batch_size
=
batch_size
,
sampler
=
sampler
,
batch_sampler
=
batch_sampler
,
num_workers
=
num_workers
,
collate_fn
=
partial
(
collate
,
samples_per_gpu
=
samples_per_gpu
),
pin_memory
=
False
,
worker_init_fn
=
init_fn
,
**
kwargs
)
return
data_loader
def
worker_init_fn
(
worker_id
,
num_workers
,
rank
,
seed
):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed
=
num_workers
*
rank
+
worker_id
+
seed
np
.
random
.
seed
(
worker_seed
)
random
.
seed
(
worker_seed
)
# Copyright (c) OpenMMLab. All rights reserved.
import
platform
from
mmcv.utils
import
Registry
,
build_from_cfg
from
mmdet.datasets
import
DATASETS
from
mmdet.datasets.builder
import
_concat_dataset
if
platform
.
system
()
!=
"Windows"
:
# https://github.com/pytorch/pytorch/issues/973
import
resource
rlimit
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
base_soft_limit
=
rlimit
[
0
]
hard_limit
=
rlimit
[
1
]
soft_limit
=
min
(
max
(
4096
,
base_soft_limit
),
hard_limit
)
resource
.
setrlimit
(
resource
.
RLIMIT_NOFILE
,
(
soft_limit
,
hard_limit
))
OBJECTSAMPLERS
=
Registry
(
"Object sampler"
)
def
custom_build_dataset
(
cfg
,
default_args
=
None
):
try
:
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
except
:
CBGSDataset
=
None
from
mmdet.datasets.dataset_wrappers
import
(
ClassBalancedDataset
,
ConcatDataset
,
RepeatDataset
,
)
if
isinstance
(
cfg
,
(
list
,
tuple
)):
dataset
=
ConcatDataset
(
[
custom_build_dataset
(
c
,
default_args
)
for
c
in
cfg
]
)
elif
cfg
[
"type"
]
==
"ConcatDataset"
:
dataset
=
ConcatDataset
(
[
custom_build_dataset
(
c
,
default_args
)
for
c
in
cfg
[
"datasets"
]],
cfg
.
get
(
"separate_eval"
,
True
),
)
elif
cfg
[
"type"
]
==
"RepeatDataset"
:
dataset
=
RepeatDataset
(
custom_build_dataset
(
cfg
[
"dataset"
],
default_args
),
cfg
[
"times"
]
)
elif
cfg
[
"type"
]
==
"ClassBalancedDataset"
:
dataset
=
ClassBalancedDataset
(
custom_build_dataset
(
cfg
[
"dataset"
],
default_args
),
cfg
[
"oversample_thr"
],
)
elif
cfg
[
"type"
]
==
"CBGSDataset"
:
dataset
=
CBGSDataset
(
custom_build_dataset
(
cfg
[
"dataset"
],
default_args
)
)
elif
isinstance
(
cfg
.
get
(
"ann_file"
),
(
list
,
tuple
)):
dataset
=
_concat_dataset
(
cfg
,
default_args
)
else
:
dataset
=
build_from_cfg
(
cfg
,
DATASETS
,
default_args
)
return
dataset
projects/mmdet3d_plugin/datasets/nuscenes_3d_det_track_dataset.py
0 → 100644
View file @
7c0c60e3
import
random
import
math
import
os
from
os
import
path
as
osp
import
cv2
import
tempfile
import
copy
import
numpy
as
np
import
torch
from
torch.utils.data
import
Dataset
import
pyquaternion
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
nuscenes.eval.detection.config
import
config_factory
as
det_configs
from
nuscenes.eval.common.config
import
config_factory
as
track_configs
import
mmcv
from
mmcv.utils
import
print_log
from
mmdet.datasets
import
DATASETS
from
mmdet.datasets.pipelines
import
Compose
from
.utils
import
(
draw_lidar_bbox3d_on_img
,
draw_lidar_bbox3d_on_bev
,
)
@
DATASETS
.
register_module
()
class
NuScenes3DDetTrackDataset
(
Dataset
):
DefaultAttribute
=
{
"car"
:
"vehicle.parked"
,
"pedestrian"
:
"pedestrian.moving"
,
"trailer"
:
"vehicle.parked"
,
"truck"
:
"vehicle.parked"
,
"bus"
:
"vehicle.moving"
,
"motorcycle"
:
"cycle.without_rider"
,
"construction_vehicle"
:
"vehicle.parked"
,
"bicycle"
:
"cycle.without_rider"
,
"barrier"
:
""
,
"traffic_cone"
:
""
,
}
ErrNameMapping
=
{
"trans_err"
:
"mATE"
,
"scale_err"
:
"mASE"
,
"orient_err"
:
"mAOE"
,
"vel_err"
:
"mAVE"
,
"attr_err"
:
"mAAE"
,
}
CLASSES
=
(
"car"
,
"truck"
,
"trailer"
,
"bus"
,
"construction_vehicle"
,
"bicycle"
,
"motorcycle"
,
"pedestrian"
,
"traffic_cone"
,
"barrier"
,
)
ID_COLOR_MAP
=
[
(
59
,
59
,
238
),
(
0
,
255
,
0
),
(
0
,
0
,
255
),
(
255
,
255
,
0
),
(
0
,
255
,
255
),
(
255
,
0
,
255
),
(
255
,
255
,
255
),
(
0
,
127
,
255
),
(
71
,
130
,
255
),
(
127
,
127
,
0
),
]
def
__init__
(
self
,
ann_file
,
pipeline
=
None
,
data_root
=
None
,
classes
=
None
,
load_interval
=
1
,
with_velocity
=
True
,
modality
=
None
,
test_mode
=
False
,
det3d_eval_version
=
"detection_cvpr_2019"
,
track3d_eval_version
=
"tracking_nips_2019"
,
version
=
"v1.0-trainval"
,
use_valid_flag
=
False
,
vis_score_threshold
=
0.25
,
data_aug_conf
=
None
,
sequences_split_num
=
1
,
with_seq_flag
=
False
,
keep_consistent_seq_aug
=
True
,
tracking
=
False
,
tracking_threshold
=
0.2
,
):
self
.
version
=
version
self
.
load_interval
=
load_interval
self
.
use_valid_flag
=
use_valid_flag
super
().
__init__
()
self
.
data_root
=
data_root
self
.
ann_file
=
ann_file
self
.
test_mode
=
test_mode
self
.
modality
=
modality
self
.
box_mode_3d
=
0
if
classes
is
not
None
:
self
.
CLASSES
=
classes
self
.
cat2id
=
{
name
:
i
for
i
,
name
in
enumerate
(
self
.
CLASSES
)}
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
if
pipeline
is
not
None
:
self
.
pipeline
=
Compose
(
pipeline
)
self
.
with_velocity
=
with_velocity
self
.
det3d_eval_version
=
det3d_eval_version
self
.
det3d_eval_configs
=
det_configs
(
self
.
det3d_eval_version
)
self
.
track3d_eval_version
=
track3d_eval_version
self
.
track3d_eval_configs
=
track_configs
(
self
.
track3d_eval_version
)
if
self
.
modality
is
None
:
self
.
modality
=
dict
(
use_camera
=
False
,
use_lidar
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
,
)
self
.
vis_score_threshold
=
vis_score_threshold
self
.
data_aug_conf
=
data_aug_conf
self
.
tracking
=
tracking
self
.
tracking_threshold
=
tracking_threshold
self
.
sequences_split_num
=
sequences_split_num
self
.
keep_consistent_seq_aug
=
keep_consistent_seq_aug
self
.
current_aug
=
None
self
.
last_id
=
None
if
with_seq_flag
:
self
.
_set_sequence_group_flag
()
def
__len__
(
self
):
return
len
(
self
.
data_infos
)
def
_set_sequence_group_flag
(
self
):
"""
Set each sequence to be a different group
"""
res
=
[]
curr_sequence
=
0
for
idx
in
range
(
len
(
self
.
data_infos
)):
if
idx
!=
0
and
len
(
self
.
data_infos
[
idx
][
"sweeps"
])
==
0
:
# Not first frame and # of sweeps is 0 -> new sequence
curr_sequence
+=
1
res
.
append
(
curr_sequence
)
self
.
flag
=
np
.
array
(
res
,
dtype
=
np
.
int64
)
if
self
.
sequences_split_num
!=
1
:
if
self
.
sequences_split_num
==
"all"
:
self
.
flag
=
np
.
array
(
range
(
len
(
self
.
data_infos
)),
dtype
=
np
.
int64
)
else
:
bin_counts
=
np
.
bincount
(
self
.
flag
)
new_flags
=
[]
curr_new_flag
=
0
for
curr_flag
in
range
(
len
(
bin_counts
)):
curr_sequence_length
=
np
.
array
(
list
(
range
(
0
,
bin_counts
[
curr_flag
],
math
.
ceil
(
bin_counts
[
curr_flag
]
/
self
.
sequences_split_num
),
)
)
+
[
bin_counts
[
curr_flag
]]
)
for
sub_seq_idx
in
(
curr_sequence_length
[
1
:]
-
curr_sequence_length
[:
-
1
]
):
for
_
in
range
(
sub_seq_idx
):
new_flags
.
append
(
curr_new_flag
)
curr_new_flag
+=
1
assert
len
(
new_flags
)
==
len
(
self
.
flag
)
assert
(
len
(
np
.
bincount
(
new_flags
))
==
len
(
np
.
bincount
(
self
.
flag
))
*
self
.
sequences_split_num
)
self
.
flag
=
np
.
array
(
new_flags
,
dtype
=
np
.
int64
)
def
get_augmentation
(
self
):
if
self
.
data_aug_conf
is
None
:
return
None
H
,
W
=
self
.
data_aug_conf
[
"H"
],
self
.
data_aug_conf
[
"W"
]
fH
,
fW
=
self
.
data_aug_conf
[
"final_dim"
]
if
not
self
.
test_mode
:
resize
=
np
.
random
.
uniform
(
*
self
.
data_aug_conf
[
"resize_lim"
])
resize_dims
=
(
int
(
W
*
resize
),
int
(
H
*
resize
))
newW
,
newH
=
resize_dims
crop_h
=
(
int
(
(
1
-
np
.
random
.
uniform
(
*
self
.
data_aug_conf
[
"bot_pct_lim"
]))
*
newH
)
-
fH
)
crop_w
=
int
(
np
.
random
.
uniform
(
0
,
max
(
0
,
newW
-
fW
)))
crop
=
(
crop_w
,
crop_h
,
crop_w
+
fW
,
crop_h
+
fH
)
flip
=
False
if
self
.
data_aug_conf
[
"rand_flip"
]
and
np
.
random
.
choice
([
0
,
1
]):
flip
=
True
rotate
=
np
.
random
.
uniform
(
*
self
.
data_aug_conf
[
"rot_lim"
])
rotate_3d
=
np
.
random
.
uniform
(
*
self
.
data_aug_conf
[
"rot3d_range"
])
else
:
resize
=
max
(
fH
/
H
,
fW
/
W
)
resize_dims
=
(
int
(
W
*
resize
),
int
(
H
*
resize
))
newW
,
newH
=
resize_dims
crop_h
=
(
int
((
1
-
np
.
mean
(
self
.
data_aug_conf
[
"bot_pct_lim"
]))
*
newH
)
-
fH
)
crop_w
=
int
(
max
(
0
,
newW
-
fW
)
/
2
)
crop
=
(
crop_w
,
crop_h
,
crop_w
+
fW
,
crop_h
+
fH
)
flip
=
False
rotate
=
0
rotate_3d
=
0
aug_config
=
{
"resize"
:
resize
,
"resize_dims"
:
resize_dims
,
"crop"
:
crop
,
"flip"
:
flip
,
"rotate"
:
rotate
,
"rotate_3d"
:
rotate_3d
,
}
return
aug_config
def
__getitem__
(
self
,
idx
):
if
isinstance
(
idx
,
dict
):
aug_config
=
idx
[
"aug_config"
]
idx
=
idx
[
"idx"
]
else
:
aug_config
=
self
.
get_augmentation
()
data
=
self
.
get_data_info
(
idx
)
data
[
"aug_config"
]
=
aug_config
data
=
self
.
pipeline
(
data
)
return
data
def
get_cat_ids
(
self
,
idx
):
info
=
self
.
data_infos
[
idx
]
if
self
.
use_valid_flag
:
mask
=
info
[
"valid_flag"
]
gt_names
=
set
(
info
[
"gt_names"
][
mask
])
else
:
gt_names
=
set
(
info
[
"gt_names"
])
cat_ids
=
[]
for
name
in
gt_names
:
if
name
in
self
.
CLASSES
:
cat_ids
.
append
(
self
.
cat2id
[
name
])
return
cat_ids
def
load_annotations
(
self
,
ann_file
):
data
=
mmcv
.
load
(
ann_file
,
file_format
=
"pkl"
)
data_infos
=
list
(
sorted
(
data
[
"infos"
],
key
=
lambda
e
:
e
[
"timestamp"
]))
data_infos
=
data_infos
[::
self
.
load_interval
]
self
.
metadata
=
data
[
"metadata"
]
self
.
version
=
self
.
metadata
[
"version"
]
print
(
self
.
metadata
)
return
data_infos
def
get_data_info
(
self
,
index
):
info
=
self
.
data_infos
[
index
]
# standard protocol modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
"token"
],
pts_filename
=
info
[
"lidar_path"
],
sweeps
=
info
[
"sweeps"
],
timestamp
=
info
[
"timestamp"
]
/
1e6
,
lidar2ego_translation
=
info
[
"lidar2ego_translation"
],
lidar2ego_rotation
=
info
[
"lidar2ego_rotation"
],
ego2global_translation
=
info
[
"ego2global_translation"
],
ego2global_rotation
=
info
[
"ego2global_rotation"
],
)
lidar2ego
=
np
.
eye
(
4
)
lidar2ego
[:
3
,
:
3
]
=
pyquaternion
.
Quaternion
(
info
[
"lidar2ego_rotation"
]
).
rotation_matrix
lidar2ego
[:
3
,
3
]
=
np
.
array
(
info
[
"lidar2ego_translation"
])
ego2global
=
np
.
eye
(
4
)
ego2global
[:
3
,
:
3
]
=
pyquaternion
.
Quaternion
(
info
[
"ego2global_rotation"
]
).
rotation_matrix
ego2global
[:
3
,
3
]
=
np
.
array
(
info
[
"ego2global_translation"
])
input_dict
[
"lidar2global"
]
=
ego2global
@
lidar2ego
if
self
.
modality
[
"use_camera"
]:
image_paths
=
[]
lidar2img_rts
=
[]
cam_intrinsic
=
[]
for
cam_type
,
cam_info
in
info
[
"cams"
].
items
():
image_paths
.
append
(
cam_info
[
"data_path"
])
# obtain lidar to image transformation matrix
lidar2cam_r
=
np
.
linalg
.
inv
(
cam_info
[
"sensor2lidar_rotation"
])
lidar2cam_t
=
(
cam_info
[
"sensor2lidar_translation"
]
@
lidar2cam_r
.
T
)
lidar2cam_rt
=
np
.
eye
(
4
)
lidar2cam_rt
[:
3
,
:
3
]
=
lidar2cam_r
.
T
lidar2cam_rt
[
3
,
:
3
]
=
-
lidar2cam_t
intrinsic
=
copy
.
deepcopy
(
cam_info
[
"cam_intrinsic"
])
cam_intrinsic
.
append
(
intrinsic
)
viewpad
=
np
.
eye
(
4
)
viewpad
[:
intrinsic
.
shape
[
0
],
:
intrinsic
.
shape
[
1
]]
=
intrinsic
lidar2img_rt
=
viewpad
@
lidar2cam_rt
.
T
lidar2img_rts
.
append
(
lidar2img_rt
)
input_dict
.
update
(
dict
(
img_filename
=
image_paths
,
lidar2img
=
lidar2img_rts
,
cam_intrinsic
=
cam_intrinsic
,
)
)
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
.
update
(
annos
)
return
input_dict
def
get_ann_info
(
self
,
index
):
info
=
self
.
data_infos
[
index
]
if
self
.
use_valid_flag
:
mask
=
info
[
"valid_flag"
]
else
:
mask
=
info
[
"num_lidar_pts"
]
>
0
gt_bboxes_3d
=
info
[
"gt_boxes"
][
mask
]
gt_names_3d
=
info
[
"gt_names"
][
mask
]
gt_labels_3d
=
[]
for
cat
in
gt_names_3d
:
if
cat
in
self
.
CLASSES
:
gt_labels_3d
.
append
(
self
.
CLASSES
.
index
(
cat
))
else
:
gt_labels_3d
.
append
(
-
1
)
gt_labels_3d
=
np
.
array
(
gt_labels_3d
)
if
self
.
with_velocity
:
gt_velocity
=
info
[
"gt_velocity"
][
mask
]
nan_mask
=
np
.
isnan
(
gt_velocity
[:,
0
])
gt_velocity
[
nan_mask
]
=
[
0.0
,
0.0
]
gt_bboxes_3d
=
np
.
concatenate
([
gt_bboxes_3d
,
gt_velocity
],
axis
=-
1
)
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_names
=
gt_names_3d
,
)
if
"instance_inds"
in
info
:
instance_inds
=
np
.
array
(
info
[
"instance_inds"
],
dtype
=
np
.
int
)[
mask
]
anns_results
[
"instance_inds"
]
=
instance_inds
return
anns_results
def
_format_bbox
(
self
,
results
,
jsonfile_prefix
=
None
,
tracking
=
False
):
nusc_annos
=
{}
mapped_class_names
=
self
.
CLASSES
print
(
"Start to convert detection format..."
)
for
sample_id
,
det
in
enumerate
(
mmcv
.
track_iter_progress
(
results
)):
annos
=
[]
boxes
=
output_to_nusc_box
(
det
,
threshold
=
self
.
tracking_threshold
if
tracking
else
None
)
sample_token
=
self
.
data_infos
[
sample_id
][
"token"
]
boxes
=
lidar_nusc_box_to_global
(
self
.
data_infos
[
sample_id
],
boxes
,
mapped_class_names
,
self
.
det3d_eval_configs
,
self
.
det3d_eval_version
,
)
for
i
,
box
in
enumerate
(
boxes
):
name
=
mapped_class_names
[
box
.
label
]
if
tracking
and
name
in
[
"barrier"
,
"traffic_cone"
,
"construction_vehicle"
,
]:
continue
if
np
.
sqrt
(
box
.
velocity
[
0
]
**
2
+
box
.
velocity
[
1
]
**
2
)
>
0.2
:
if
name
in
[
"car"
,
"construction_vehicle"
,
"bus"
,
"truck"
,
"trailer"
,
]:
attr
=
"vehicle.moving"
elif
name
in
[
"bicycle"
,
"motorcycle"
]:
attr
=
"cycle.with_rider"
else
:
attr
=
NuScenes3DDetTrackDataset
.
DefaultAttribute
[
name
]
else
:
if
name
in
[
"pedestrian"
]:
attr
=
"pedestrian.standing"
elif
name
in
[
"bus"
]:
attr
=
"vehicle.stopped"
else
:
attr
=
NuScenes3DDetTrackDataset
.
DefaultAttribute
[
name
]
nusc_anno
=
dict
(
sample_token
=
sample_token
,
translation
=
box
.
center
.
tolist
(),
size
=
box
.
wlh
.
tolist
(),
rotation
=
box
.
orientation
.
elements
.
tolist
(),
velocity
=
box
.
velocity
[:
2
].
tolist
(),
)
if
not
tracking
:
nusc_anno
.
update
(
dict
(
detection_name
=
name
,
detection_score
=
box
.
score
,
attribute_name
=
attr
,
)
)
else
:
nusc_anno
.
update
(
dict
(
tracking_name
=
name
,
tracking_score
=
box
.
score
,
tracking_id
=
str
(
box
.
token
),
)
)
annos
.
append
(
nusc_anno
)
nusc_annos
[
sample_token
]
=
annos
nusc_submissions
=
{
"meta"
:
self
.
modality
,
"results"
:
nusc_annos
,
}
mmcv
.
mkdir_or_exist
(
jsonfile_prefix
)
res_path
=
osp
.
join
(
jsonfile_prefix
,
"results_nusc.json"
)
print
(
"Results writes to"
,
res_path
)
mmcv
.
dump
(
nusc_submissions
,
res_path
)
return
res_path
def
_evaluate_single
(
self
,
result_path
,
logger
=
None
,
result_name
=
"img_bbox"
,
tracking
=
False
):
from
nuscenes
import
NuScenes
output_dir
=
osp
.
join
(
*
osp
.
split
(
result_path
)[:
-
1
])
nusc
=
NuScenes
(
version
=
self
.
version
,
dataroot
=
self
.
data_root
,
verbose
=
False
)
eval_set_map
=
{
"v1.0-mini"
:
"mini_val"
,
"v1.0-trainval"
:
"val"
,
}
if
not
tracking
:
from
nuscenes.eval.detection.evaluate
import
NuScenesEval
nusc_eval
=
NuScenesEval
(
nusc
,
config
=
self
.
det3d_eval_configs
,
result_path
=
result_path
,
eval_set
=
eval_set_map
[
self
.
version
],
output_dir
=
output_dir
,
verbose
=
True
,
)
nusc_eval
.
main
(
render_curves
=
False
)
# record metrics
metrics
=
mmcv
.
load
(
osp
.
join
(
output_dir
,
"metrics_summary.json"
))
detail
=
dict
()
metric_prefix
=
f
"
{
result_name
}
_NuScenes"
for
name
in
self
.
CLASSES
:
for
k
,
v
in
metrics
[
"label_aps"
][
name
].
items
():
val
=
float
(
"{:.4f}"
.
format
(
v
))
detail
[
"{}/{}_AP_dist_{}"
.
format
(
metric_prefix
,
name
,
k
)
]
=
val
for
k
,
v
in
metrics
[
"label_tp_errors"
][
name
].
items
():
val
=
float
(
"{:.4f}"
.
format
(
v
))
detail
[
"{}/{}_{}"
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
"tp_errors"
].
items
():
val
=
float
(
"{:.4f}"
.
format
(
v
))
detail
[
"{}/{}"
.
format
(
metric_prefix
,
self
.
ErrNameMapping
[
k
])
]
=
val
detail
[
"{}/NDS"
.
format
(
metric_prefix
)]
=
metrics
[
"nd_score"
]
detail
[
"{}/mAP"
.
format
(
metric_prefix
)]
=
metrics
[
"mean_ap"
]
else
:
from
nuscenes.eval.tracking.evaluate
import
TrackingEval
nusc_eval
=
TrackingEval
(
config
=
self
.
track3d_eval_configs
,
result_path
=
result_path
,
eval_set
=
eval_set_map
[
self
.
version
],
output_dir
=
output_dir
,
verbose
=
True
,
nusc_version
=
self
.
version
,
nusc_dataroot
=
self
.
data_root
,
)
metrics
=
nusc_eval
.
main
()
# record metrics
metrics
=
mmcv
.
load
(
osp
.
join
(
output_dir
,
"metrics_summary.json"
))
print
(
metrics
)
detail
=
dict
()
metric_prefix
=
f
"
{
result_name
}
_NuScenes"
keys
=
[
"amota"
,
"amotp"
,
"recall"
,
"motar"
,
"gt"
,
"mota"
,
"motp"
,
"mt"
,
"ml"
,
"faf"
,
"tp"
,
"fp"
,
"fn"
,
"ids"
,
"frag"
,
"tid"
,
"lgd"
,
]
for
key
in
keys
:
detail
[
"{}/{}"
.
format
(
metric_prefix
,
key
)]
=
metrics
[
key
]
return
detail
def
format_results
(
self
,
results
,
jsonfile_prefix
=
None
,
tracking
=
False
):
assert
isinstance
(
results
,
list
),
"results must be a list"
if
jsonfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
jsonfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
"results"
)
else
:
tmp_dir
=
None
if
not
(
"pts_bbox"
in
results
[
0
]
or
"img_bbox"
in
results
[
0
]):
result_files
=
self
.
_format_bbox
(
results
,
jsonfile_prefix
,
tracking
=
tracking
)
else
:
result_files
=
dict
()
for
name
in
results
[
0
]:
print
(
f
"
\n
Formating bboxes of
{
name
}
"
)
results_
=
[
out
[
name
]
for
out
in
results
]
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
result_files
.
update
(
{
name
:
self
.
_format_bbox
(
results_
,
tmp_file_
,
tracking
=
tracking
)
}
)
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
None
,
logger
=
None
,
jsonfile_prefix
=
None
,
result_names
=
[
"img_bbox"
],
show
=
False
,
out_dir
=
None
,
pipeline
=
None
,
):
for
metric
in
[
"detection"
,
"tracking"
]:
tracking
=
metric
==
"tracking"
if
tracking
and
not
self
.
tracking
:
continue
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
jsonfile_prefix
,
tracking
=
tracking
)
if
isinstance
(
result_files
,
dict
):
results_dict
=
dict
()
for
name
in
result_names
:
ret_dict
=
self
.
_evaluate_single
(
result_files
[
name
],
tracking
=
tracking
)
results_dict
.
update
(
ret_dict
)
elif
isinstance
(
result_files
,
str
):
results_dict
=
self
.
_evaluate_single
(
result_files
,
tracking
=
tracking
)
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
or
out_dir
:
self
.
show
(
results
,
save_dir
=
out_dir
,
show
=
show
,
pipeline
=
pipeline
)
return
results_dict
def
show
(
self
,
results
,
save_dir
=
None
,
show
=
False
,
pipeline
=
None
):
save_dir
=
"./"
if
save_dir
is
None
else
save_dir
save_dir
=
os
.
path
.
join
(
save_dir
,
"visual"
)
print_log
(
os
.
path
.
abspath
(
save_dir
))
pipeline
=
Compose
(
pipeline
)
if
not
os
.
path
.
exists
(
save_dir
):
os
.
makedirs
(
save_dir
)
fourcc
=
cv2
.
VideoWriter_fourcc
(
*
"MJPG"
)
videoWriter
=
None
for
i
,
result
in
enumerate
(
results
):
if
"img_bbox"
in
result
.
keys
():
result
=
result
[
"img_bbox"
]
data_info
=
pipeline
(
self
.
get_data_info
(
i
))
imgs
=
[]
raw_imgs
=
data_info
[
"img"
]
lidar2img
=
data_info
[
"img_metas"
].
data
[
"lidar2img"
]
pred_bboxes_3d
=
result
[
"boxes_3d"
][
result
[
"scores_3d"
]
>
self
.
vis_score_threshold
]
if
"instance_ids"
in
result
and
self
.
tracking
:
color
=
[]
for
id
in
result
[
"instance_ids"
].
cpu
().
numpy
().
tolist
():
color
.
append
(
self
.
ID_COLOR_MAP
[
int
(
id
%
len
(
self
.
ID_COLOR_MAP
))]
)
elif
"labels_3d"
in
result
:
color
=
[]
for
id
in
result
[
"labels_3d"
].
cpu
().
numpy
().
tolist
():
color
.
append
(
self
.
ID_COLOR_MAP
[
id
])
else
:
color
=
(
255
,
0
,
0
)
# ===== draw boxes_3d to images =====
for
j
,
img_origin
in
enumerate
(
raw_imgs
):
img
=
img_origin
.
copy
()
if
len
(
pred_bboxes_3d
)
!=
0
:
img
=
draw_lidar_bbox3d_on_img
(
pred_bboxes_3d
,
img
,
lidar2img
[
j
],
img_metas
=
None
,
color
=
color
,
thickness
=
3
,
)
imgs
.
append
(
img
)
# ===== draw boxes_3d to BEV =====
bev
=
draw_lidar_bbox3d_on_bev
(
pred_bboxes_3d
,
bev_size
=
img
.
shape
[
0
]
*
2
,
color
=
color
,
)
# ===== put text and concat =====
for
j
,
name
in
enumerate
(
[
"front"
,
"front right"
,
"front left"
,
"rear"
,
"rear left"
,
"rear right"
,
]
):
imgs
[
j
]
=
cv2
.
rectangle
(
imgs
[
j
],
(
0
,
0
),
(
440
,
80
),
color
=
(
255
,
255
,
255
),
thickness
=-
1
,
)
w
,
h
=
cv2
.
getTextSize
(
name
,
cv2
.
FONT_HERSHEY_SIMPLEX
,
2
,
2
)[
0
]
text_x
=
int
(
220
-
w
/
2
)
text_y
=
int
(
40
+
h
/
2
)
imgs
[
j
]
=
cv2
.
putText
(
imgs
[
j
],
name
,
(
text_x
,
text_y
),
cv2
.
FONT_HERSHEY_SIMPLEX
,
2
,
(
0
,
0
,
0
),
2
,
cv2
.
LINE_AA
,
)
image
=
np
.
concatenate
(
[
np
.
concatenate
([
imgs
[
2
],
imgs
[
0
],
imgs
[
1
]],
axis
=
1
),
np
.
concatenate
([
imgs
[
5
],
imgs
[
3
],
imgs
[
4
]],
axis
=
1
),
],
axis
=
0
,
)
image
=
np
.
concatenate
([
image
,
bev
],
axis
=
1
)
# ===== save video =====
if
videoWriter
is
None
:
videoWriter
=
cv2
.
VideoWriter
(
os
.
path
.
join
(
save_dir
,
"video.avi"
),
fourcc
,
7
,
image
.
shape
[:
2
][::
-
1
],
)
cv2
.
imwrite
(
os
.
path
.
join
(
save_dir
,
f
"
{
i
}
.jpg"
),
image
)
videoWriter
.
write
(
image
)
videoWriter
.
release
()
def
output_to_nusc_box
(
detection
,
threshold
=
None
):
box3d
=
detection
[
"boxes_3d"
]
scores
=
detection
[
"scores_3d"
].
numpy
()
labels
=
detection
[
"labels_3d"
].
numpy
()
if
"instance_ids"
in
detection
:
ids
=
detection
[
"instance_ids"
]
# .numpy()
if
threshold
is
not
None
:
if
"cls_scores"
in
detection
:
mask
=
detection
[
"cls_scores"
].
numpy
()
>=
threshold
else
:
mask
=
scores
>=
threshold
box3d
=
box3d
[
mask
]
scores
=
scores
[
mask
]
labels
=
labels
[
mask
]
ids
=
ids
[
mask
]
if
hasattr
(
box3d
,
"gravity_center"
):
box_gravity_center
=
box3d
.
gravity_center
.
numpy
()
box_dims
=
box3d
.
dims
.
numpy
()
nus_box_dims
=
box_dims
[:,
[
1
,
0
,
2
]]
box_yaw
=
box3d
.
yaw
.
numpy
()
else
:
box3d
=
box3d
.
numpy
()
box_gravity_center
=
box3d
[...,
:
3
].
copy
()
box_dims
=
box3d
[...,
3
:
6
].
copy
()
nus_box_dims
=
box_dims
[...,
[
1
,
0
,
2
]]
box_yaw
=
box3d
[...,
6
].
copy
()
# TODO: check whether this is necessary
# with dir_offset & dir_limit in the head
# box_yaw = -box_yaw - np.pi / 2
box_list
=
[]
for
i
in
range
(
len
(
box3d
)):
quat
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
if
hasattr
(
box3d
,
"gravity_center"
):
velocity
=
(
*
box3d
.
tensor
[
i
,
7
:
9
],
0.0
)
else
:
velocity
=
(
*
box3d
[
i
,
7
:
9
],
0.0
)
box
=
NuScenesBox
(
box_gravity_center
[
i
],
nus_box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
velocity
=
velocity
,
)
if
"instance_ids"
in
detection
:
box
.
token
=
ids
[
i
]
box_list
.
append
(
box
)
return
box_list
def
lidar_nusc_box_to_global
(
info
,
boxes
,
classes
,
eval_configs
,
eval_version
=
"detection_cvpr_2019"
,
):
box_list
=
[]
for
i
,
box
in
enumerate
(
boxes
):
# Move box to ego vehicle coord system
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
"lidar2ego_rotation"
]))
box
.
translate
(
np
.
array
(
info
[
"lidar2ego_translation"
]))
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to global coord system
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
"ego2global_rotation"
]))
box
.
translate
(
np
.
array
(
info
[
"ego2global_translation"
]))
box_list
.
append
(
box
)
return
box_list
projects/mmdet3d_plugin/datasets/pipelines/__init__.py
0 → 100644
View file @
7c0c60e3
from
.transform
import
(
InstanceNameFilter
,
CircleObjectRangeFilter
,
NormalizeMultiviewImage
,
NuScenesSparse4DAdaptor
,
MultiScaleDepthMapGenerator
,
)
from
.augment
import
(
ResizeCropFlipImage
,
BBoxRotation
,
PhotoMetricDistortionMultiViewImage
,
)
from
.loading
import
LoadMultiViewImageFromFiles
,
LoadPointsFromFile
__all__
=
[
"InstanceNameFilter"
,
"ResizeCropFlipImage"
,
"BBoxRotation"
,
"CircleObjectRangeFilter"
,
"MultiScaleDepthMapGenerator"
,
"NormalizeMultiviewImage"
,
"PhotoMetricDistortionMultiViewImage"
,
"NuScenesSparse4DAdaptor"
,
"LoadMultiViewImageFromFiles"
,
"LoadPointsFromFile"
,
]
projects/mmdet3d_plugin/datasets/pipelines/augment.py
0 → 100644
View file @
7c0c60e3
import
torch
import
numpy
as
np
from
numpy
import
random
import
mmcv
from
mmdet.datasets.builder
import
PIPELINES
from
PIL
import
Image
@
PIPELINES
.
register_module
()
class
ResizeCropFlipImage
(
object
):
def
__call__
(
self
,
results
):
aug_config
=
results
.
get
(
"aug_config"
)
if
aug_config
is
None
:
return
results
imgs
=
results
[
"img"
]
N
=
len
(
imgs
)
new_imgs
=
[]
for
i
in
range
(
N
):
img
,
mat
=
self
.
_img_transform
(
np
.
uint8
(
imgs
[
i
]),
aug_config
,
)
new_imgs
.
append
(
np
.
array
(
img
).
astype
(
np
.
float32
))
results
[
"lidar2img"
][
i
]
=
mat
@
results
[
"lidar2img"
][
i
]
if
"cam_intrinsic"
in
results
:
results
[
"cam_intrinsic"
][
i
][:
3
,
:
3
]
*=
aug_config
[
"resize"
]
# results["cam_intrinsic"][i][:3, :3] = (
# mat[:3, :3] @ results["cam_intrinsic"][i][:3, :3]
# )
results
[
"img"
]
=
new_imgs
results
[
"img_shape"
]
=
[
x
.
shape
[:
2
]
for
x
in
new_imgs
]
return
results
def
_img_transform
(
self
,
img
,
aug_configs
):
H
,
W
=
img
.
shape
[:
2
]
resize
=
aug_configs
.
get
(
"resize"
,
1
)
resize_dims
=
(
int
(
W
*
resize
),
int
(
H
*
resize
))
crop
=
aug_configs
.
get
(
"crop"
,
[
0
,
0
,
*
resize_dims
])
flip
=
aug_configs
.
get
(
"flip"
,
False
)
rotate
=
aug_configs
.
get
(
"rotate"
,
0
)
origin_dtype
=
img
.
dtype
if
origin_dtype
!=
np
.
uint8
:
min_value
=
img
.
min
()
max_vaule
=
img
.
max
()
scale
=
255
/
(
max_vaule
-
min_value
)
img
=
(
img
-
min_value
)
*
scale
img
=
np
.
uint8
(
img
)
img
=
Image
.
fromarray
(
img
)
img
=
img
.
resize
(
resize_dims
).
crop
(
crop
)
if
flip
:
img
=
img
.
transpose
(
method
=
Image
.
FLIP_LEFT_RIGHT
)
img
=
img
.
rotate
(
rotate
)
img
=
np
.
array
(
img
).
astype
(
np
.
float32
)
if
origin_dtype
!=
np
.
uint8
:
img
=
img
.
astype
(
np
.
float32
)
img
=
img
/
scale
+
min_value
transform_matrix
=
np
.
eye
(
3
)
transform_matrix
[:
2
,
:
2
]
*=
resize
transform_matrix
[:
2
,
2
]
-=
np
.
array
(
crop
[:
2
])
if
flip
:
flip_matrix
=
np
.
array
(
[[
-
1
,
0
,
crop
[
2
]
-
crop
[
0
]],
[
0
,
1
,
0
],
[
0
,
0
,
1
]]
)
transform_matrix
=
flip_matrix
@
transform_matrix
rotate
=
rotate
/
180
*
np
.
pi
rot_matrix
=
np
.
array
(
[
[
np
.
cos
(
rotate
),
np
.
sin
(
rotate
),
0
],
[
-
np
.
sin
(
rotate
),
np
.
cos
(
rotate
),
0
],
[
0
,
0
,
1
],
]
)
rot_center
=
np
.
array
([
crop
[
2
]
-
crop
[
0
],
crop
[
3
]
-
crop
[
1
]])
/
2
rot_matrix
[:
2
,
2
]
=
-
rot_matrix
[:
2
,
:
2
]
@
rot_center
+
rot_center
transform_matrix
=
rot_matrix
@
transform_matrix
extend_matrix
=
np
.
eye
(
4
)
extend_matrix
[:
3
,
:
3
]
=
transform_matrix
return
img
,
extend_matrix
@
PIPELINES
.
register_module
()
class
BBoxRotation
(
object
):
def
__call__
(
self
,
results
):
angle
=
results
[
"aug_config"
][
"rotate_3d"
]
rot_cos
=
np
.
cos
(
angle
)
rot_sin
=
np
.
sin
(
angle
)
rot_mat
=
np
.
array
(
[
[
rot_cos
,
-
rot_sin
,
0
,
0
],
[
rot_sin
,
rot_cos
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
],
]
)
rot_mat_inv
=
np
.
linalg
.
inv
(
rot_mat
)
num_view
=
len
(
results
[
"lidar2img"
])
for
view
in
range
(
num_view
):
results
[
"lidar2img"
][
view
]
=
(
results
[
"lidar2img"
][
view
]
@
rot_mat_inv
)
if
"lidar2global"
in
results
:
results
[
"lidar2global"
]
=
results
[
"lidar2global"
]
@
rot_mat_inv
if
"gt_bboxes_3d"
in
results
:
results
[
"gt_bboxes_3d"
]
=
self
.
box_rotate
(
results
[
"gt_bboxes_3d"
],
angle
)
return
results
@
staticmethod
def
box_rotate
(
bbox_3d
,
angle
):
rot_cos
=
np
.
cos
(
angle
)
rot_sin
=
np
.
sin
(
angle
)
rot_mat_T
=
np
.
array
(
[[
rot_cos
,
rot_sin
,
0
],
[
-
rot_sin
,
rot_cos
,
0
],
[
0
,
0
,
1
]]
)
bbox_3d
[:,
:
3
]
=
bbox_3d
[:,
:
3
]
@
rot_mat_T
bbox_3d
[:,
6
]
+=
angle
if
bbox_3d
.
shape
[
-
1
]
>
7
:
vel_dims
=
bbox_3d
[:,
7
:].
shape
[
-
1
]
bbox_3d
[:,
7
:]
=
bbox_3d
[:,
7
:]
@
rot_mat_T
[:
vel_dims
,
:
vel_dims
]
return
bbox_3d
@
PIPELINES
.
register_module
()
class
PhotoMetricDistortionMultiViewImage
:
"""Apply photometric distortion to image sequentially, every transformation
is applied with a probability of 0.5. The position of random contrast is in
second or second to last.
1. random brightness
2. random contrast (mode 0)
3. convert color from BGR to HSV
4. random saturation
5. random hue
6. convert color from HSV to BGR
7. random contrast (mode 1)
8. randomly swap channels
Args:
brightness_delta (int): delta of brightness.
contrast_range (tuple): range of contrast.
saturation_range (tuple): range of saturation.
hue_delta (int): delta of hue.
"""
def
__init__
(
self
,
brightness_delta
=
32
,
contrast_range
=
(
0.5
,
1.5
),
saturation_range
=
(
0.5
,
1.5
),
hue_delta
=
18
,
):
self
.
brightness_delta
=
brightness_delta
self
.
contrast_lower
,
self
.
contrast_upper
=
contrast_range
self
.
saturation_lower
,
self
.
saturation_upper
=
saturation_range
self
.
hue_delta
=
hue_delta
def
__call__
(
self
,
results
):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
imgs
=
results
[
"img"
]
new_imgs
=
[]
for
img
in
imgs
:
assert
img
.
dtype
==
np
.
float32
,
(
"PhotoMetricDistortion needs the input image of dtype np.float32,"
' please set "to_float32=True" in "LoadImageFromFile" pipeline'
)
# random brightness
if
random
.
randint
(
2
):
delta
=
random
.
uniform
(
-
self
.
brightness_delta
,
self
.
brightness_delta
)
img
+=
delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode
=
random
.
randint
(
2
)
if
mode
==
1
:
if
random
.
randint
(
2
):
alpha
=
random
.
uniform
(
self
.
contrast_lower
,
self
.
contrast_upper
)
img
*=
alpha
# convert color from BGR to HSV
img
=
mmcv
.
bgr2hsv
(
img
)
# random saturation
if
random
.
randint
(
2
):
img
[...,
1
]
*=
random
.
uniform
(
self
.
saturation_lower
,
self
.
saturation_upper
)
# random hue
if
random
.
randint
(
2
):
img
[...,
0
]
+=
random
.
uniform
(
-
self
.
hue_delta
,
self
.
hue_delta
)
img
[...,
0
][
img
[...,
0
]
>
360
]
-=
360
img
[...,
0
][
img
[...,
0
]
<
0
]
+=
360
# convert color from HSV to BGR
img
=
mmcv
.
hsv2bgr
(
img
)
# random contrast
if
mode
==
0
:
if
random
.
randint
(
2
):
alpha
=
random
.
uniform
(
self
.
contrast_lower
,
self
.
contrast_upper
)
img
*=
alpha
# randomly swap channels
if
random
.
randint
(
2
):
img
=
img
[...,
random
.
permutation
(
3
)]
new_imgs
.
append
(
img
)
results
[
"img"
]
=
new_imgs
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
"(
\n
brightness_delta=
{
self
.
brightness_delta
}
,
\n
"
repr_str
+=
"contrast_range="
repr_str
+=
f
"
{
(
self
.
contrast_lower
,
self
.
contrast_upper
)
}
,
\n
"
repr_str
+=
"saturation_range="
repr_str
+=
f
"
{
(
self
.
saturation_lower
,
self
.
saturation_upper
)
}
,
\n
"
repr_str
+=
f
"hue_delta=
{
self
.
hue_delta
}
)"
return
repr_str
projects/mmdet3d_plugin/datasets/pipelines/loading.py
0 → 100644
View file @
7c0c60e3
import
numpy
as
np
import
mmcv
from
mmdet.datasets.builder
import
PIPELINES
@
PIPELINES
.
register_module
()
class
LoadMultiViewImageFromFiles
(
object
):
"""Load multi channel images from a list of separate channel files.
Expects results['img_filename'] to be a list of filenames.
Args:
to_float32 (bool, optional): Whether to convert the img to float32.
Defaults to False.
color_type (str, optional): Color type of the file.
Defaults to 'unchanged'.
"""
def
__init__
(
self
,
to_float32
=
False
,
color_type
=
"unchanged"
):
self
.
to_float32
=
to_float32
self
.
color_type
=
color_type
def
__call__
(
self
,
results
):
"""Call function to load multi-view image from files.
Args:
results (dict): Result dict containing multi-view image filenames.
Returns:
dict: The result dict containing the multi-view image data.
Added keys and values are described below.
- filename (str): Multi-view image filenames.
- img (np.ndarray): Multi-view image arrays.
- img_shape (tuple[int]): Shape of multi-view image arrays.
- ori_shape (tuple[int]): Shape of original image arrays.
- pad_shape (tuple[int]): Shape of padded image arrays.
- scale_factor (float): Scale factor.
- img_norm_cfg (dict): Normalization configuration of images.
"""
filename
=
results
[
"img_filename"
]
# img is of shape (h, w, c, num_views)
img
=
np
.
stack
(
[
mmcv
.
imread
(
name
,
self
.
color_type
)
for
name
in
filename
],
axis
=-
1
)
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
"filename"
]
=
filename
# unravel to list, see `DefaultFormatBundle` in formatting.py
# which will transpose each image separately and then stack into array
results
[
"img"
]
=
[
img
[...,
i
]
for
i
in
range
(
img
.
shape
[
-
1
])]
results
[
"img_shape"
]
=
img
.
shape
results
[
"ori_shape"
]
=
img
.
shape
# Set initial values for default meta_keys
results
[
"pad_shape"
]
=
img
.
shape
results
[
"scale_factor"
]
=
1.0
num_channels
=
1
if
len
(
img
.
shape
)
<
3
else
img
.
shape
[
2
]
results
[
"img_norm_cfg"
]
=
dict
(
mean
=
np
.
zeros
(
num_channels
,
dtype
=
np
.
float32
),
std
=
np
.
ones
(
num_channels
,
dtype
=
np
.
float32
),
to_rgb
=
False
,
)
return
results
def
__repr__
(
self
):
"""str: Return a string that describes the module."""
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
"(to_float32=
{
self
.
to_float32
}
, "
repr_str
+=
f
"color_type='
{
self
.
color_type
}
')"
return
repr_str
@
PIPELINES
.
register_module
()
class
LoadPointsFromFile
(
object
):
"""Load Points From File.
Load points from file.
Args:
coord_type (str): The type of coordinates of points cloud.
Available options includes:
- 'LIDAR': Points in LiDAR coordinates.
- 'DEPTH': Points in depth coordinates, usually for indoor dataset.
- 'CAMERA': Points in camera coordinates.
load_dim (int, optional): The dimension of the loaded points.
Defaults to 6.
use_dim (list[int], optional): Which dimensions of the points to use.
Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension.
shift_height (bool, optional): Whether to use shifted height.
Defaults to False.
use_color (bool, optional): Whether to use color features.
Defaults to False.
file_client_args (dict, optional): Config dict of file clients,
refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details. Defaults to dict(backend='disk').
"""
def
__init__
(
self
,
coord_type
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
shift_height
=
False
,
use_color
=
False
,
file_client_args
=
dict
(
backend
=
"disk"
),
):
self
.
shift_height
=
shift_height
self
.
use_color
=
use_color
if
isinstance
(
use_dim
,
int
):
use_dim
=
list
(
range
(
use_dim
))
assert
(
max
(
use_dim
)
<
load_dim
),
f
"Expect all used dimensions <
{
load_dim
}
, got
{
use_dim
}
"
assert
coord_type
in
[
"CAMERA"
,
"LIDAR"
,
"DEPTH"
]
self
.
coord_type
=
coord_type
self
.
load_dim
=
load_dim
self
.
use_dim
=
use_dim
self
.
file_client_args
=
file_client_args
.
copy
()
self
.
file_client
=
None
def
_load_points
(
self
,
pts_filename
):
"""Private function to load point clouds data.
Args:
pts_filename (str): Filename of point clouds data.
Returns:
np.ndarray: An array containing point clouds data.
"""
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
pts_bytes
=
self
.
file_client
.
get
(
pts_filename
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_filename
)
if
pts_filename
.
endswith
(
".npy"
):
points
=
np
.
load
(
pts_filename
)
else
:
points
=
np
.
fromfile
(
pts_filename
,
dtype
=
np
.
float32
)
return
points
def
__call__
(
self
,
results
):
"""Call function to load points data from file.
Args:
results (dict): Result dict containing point clouds data.
Returns:
dict: The result dict containing the point clouds data.
Added key and value are described below.
- points (:obj:`BasePoints`): Point clouds data.
"""
pts_filename
=
results
[
"pts_filename"
]
points
=
self
.
_load_points
(
pts_filename
)
points
=
points
.
reshape
(
-
1
,
self
.
load_dim
)
points
=
points
[:,
self
.
use_dim
]
attribute_dims
=
None
if
self
.
shift_height
:
floor_height
=
np
.
percentile
(
points
[:,
2
],
0.99
)
height
=
points
[:,
2
]
-
floor_height
points
=
np
.
concatenate
(
[
points
[:,
:
3
],
np
.
expand_dims
(
height
,
1
),
points
[:,
3
:]],
1
)
attribute_dims
=
dict
(
height
=
3
)
if
self
.
use_color
:
assert
len
(
self
.
use_dim
)
>=
6
if
attribute_dims
is
None
:
attribute_dims
=
dict
()
attribute_dims
.
update
(
dict
(
color
=
[
points
.
shape
[
1
]
-
3
,
points
.
shape
[
1
]
-
2
,
points
.
shape
[
1
]
-
1
,
]
)
)
results
[
"points"
]
=
points
return
results
projects/mmdet3d_plugin/datasets/pipelines/transform.py
0 → 100644
View file @
7c0c60e3
import
numpy
as
np
import
mmcv
from
mmcv.parallel
import
DataContainer
as
DC
from
mmdet.datasets.builder
import
PIPELINES
from
mmdet.datasets.pipelines
import
to_tensor
@
PIPELINES
.
register_module
()
class
MultiScaleDepthMapGenerator
(
object
):
def
__init__
(
self
,
downsample
=
1
,
max_depth
=
60
):
if
not
isinstance
(
downsample
,
(
list
,
tuple
)):
downsample
=
[
downsample
]
self
.
downsample
=
downsample
self
.
max_depth
=
max_depth
def
__call__
(
self
,
input_dict
):
points
=
input_dict
[
"points"
][...,
:
3
,
None
]
gt_depth
=
[]
for
i
,
lidar2img
in
enumerate
(
input_dict
[
"lidar2img"
]):
H
,
W
=
input_dict
[
"img_shape"
][
i
][:
2
]
pts_2d
=
(
np
.
squeeze
(
lidar2img
[:
3
,
:
3
]
@
points
,
axis
=-
1
)
+
lidar2img
[:
3
,
3
]
)
pts_2d
[:,
:
2
]
/=
pts_2d
[:,
2
:
3
]
U
=
np
.
round
(
pts_2d
[:,
0
]).
astype
(
np
.
int32
)
V
=
np
.
round
(
pts_2d
[:,
1
]).
astype
(
np
.
int32
)
depths
=
pts_2d
[:,
2
]
mask
=
np
.
logical_and
.
reduce
(
[
V
>=
0
,
V
<
H
,
U
>=
0
,
U
<
W
,
depths
>=
0.1
,
# depths <= self.max_depth,
]
)
V
,
U
,
depths
=
V
[
mask
],
U
[
mask
],
depths
[
mask
]
sort_idx
=
np
.
argsort
(
depths
)[::
-
1
]
V
,
U
,
depths
=
V
[
sort_idx
],
U
[
sort_idx
],
depths
[
sort_idx
]
depths
=
np
.
clip
(
depths
,
0.1
,
self
.
max_depth
)
for
j
,
downsample
in
enumerate
(
self
.
downsample
):
if
len
(
gt_depth
)
<
j
+
1
:
gt_depth
.
append
([])
h
,
w
=
(
int
(
H
/
downsample
),
int
(
W
/
downsample
))
u
=
np
.
floor
(
U
/
downsample
).
astype
(
np
.
int32
)
v
=
np
.
floor
(
V
/
downsample
).
astype
(
np
.
int32
)
depth_map
=
np
.
ones
([
h
,
w
],
dtype
=
np
.
float32
)
*
-
1
depth_map
[
v
,
u
]
=
depths
gt_depth
[
j
].
append
(
depth_map
)
input_dict
[
"gt_depth"
]
=
[
np
.
stack
(
x
)
for
x
in
gt_depth
]
return
input_dict
@
PIPELINES
.
register_module
()
class
NuScenesSparse4DAdaptor
(
object
):
def
__init
(
self
):
pass
def
__call__
(
self
,
input_dict
):
input_dict
[
"projection_mat"
]
=
np
.
float32
(
np
.
stack
(
input_dict
[
"lidar2img"
])
)
input_dict
[
"image_wh"
]
=
np
.
ascontiguousarray
(
np
.
array
(
input_dict
[
"img_shape"
],
dtype
=
np
.
float32
)[:,
:
2
][:,
::
-
1
]
)
input_dict
[
"T_global_inv"
]
=
np
.
linalg
.
inv
(
input_dict
[
"lidar2global"
])
input_dict
[
"T_global"
]
=
input_dict
[
"lidar2global"
]
if
"cam_intrinsic"
in
input_dict
:
input_dict
[
"cam_intrinsic"
]
=
np
.
float32
(
np
.
stack
(
input_dict
[
"cam_intrinsic"
])
)
input_dict
[
"focal"
]
=
input_dict
[
"cam_intrinsic"
][...,
0
,
0
]
# input_dict["focal"] = np.sqrt(
# np.abs(np.linalg.det(input_dict["cam_intrinsic"][:, :2, :2]))
# )
if
"instance_inds"
in
input_dict
:
input_dict
[
"instance_id"
]
=
input_dict
[
"instance_inds"
]
if
"gt_bboxes_3d"
in
input_dict
:
input_dict
[
"gt_bboxes_3d"
][:,
6
]
=
self
.
limit_period
(
input_dict
[
"gt_bboxes_3d"
][:,
6
],
offset
=
0.5
,
period
=
2
*
np
.
pi
)
input_dict
[
"gt_bboxes_3d"
]
=
DC
(
to_tensor
(
input_dict
[
"gt_bboxes_3d"
]).
float
()
)
if
"gt_labels_3d"
in
input_dict
:
input_dict
[
"gt_labels_3d"
]
=
DC
(
to_tensor
(
input_dict
[
"gt_labels_3d"
]).
long
()
)
imgs
=
[
img
.
transpose
(
2
,
0
,
1
)
for
img
in
input_dict
[
"img"
]]
imgs
=
np
.
ascontiguousarray
(
np
.
stack
(
imgs
,
axis
=
0
))
input_dict
[
"img"
]
=
DC
(
to_tensor
(
imgs
),
stack
=
True
)
return
input_dict
def
limit_period
(
self
,
val
:
np
.
ndarray
,
offset
:
float
=
0.5
,
period
:
float
=
np
.
pi
)
->
np
.
ndarray
:
limited_val
=
val
-
np
.
floor
(
val
/
period
+
offset
)
*
period
return
limited_val
@
PIPELINES
.
register_module
()
class
InstanceNameFilter
(
object
):
"""Filter GT objects by their names.
Args:
classes (list[str]): List of class names to be kept for training.
"""
def
__init__
(
self
,
classes
):
self
.
classes
=
classes
self
.
labels
=
list
(
range
(
len
(
self
.
classes
)))
def
__call__
(
self
,
input_dict
):
"""Call function to filter objects by their names.
Args:
input_dict (dict): Result dict from loading pipeline.
Returns:
dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
\
keys are updated in the result dict.
"""
gt_labels_3d
=
input_dict
[
"gt_labels_3d"
]
gt_bboxes_mask
=
np
.
array
(
[
n
in
self
.
labels
for
n
in
gt_labels_3d
],
dtype
=
np
.
bool_
)
input_dict
[
"gt_bboxes_3d"
]
=
input_dict
[
"gt_bboxes_3d"
][
gt_bboxes_mask
]
input_dict
[
"gt_labels_3d"
]
=
input_dict
[
"gt_labels_3d"
][
gt_bboxes_mask
]
if
"instance_inds"
in
input_dict
:
input_dict
[
"instance_inds"
]
=
input_dict
[
"instance_inds"
][
gt_bboxes_mask
]
return
input_dict
def
__repr__
(
self
):
"""str: Return a string that describes the module."""
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
"(classes=
{
self
.
classes
}
)"
return
repr_str
@
PIPELINES
.
register_module
()
class
CircleObjectRangeFilter
(
object
):
def
__init__
(
self
,
class_dist_thred
=
[
52.5
]
*
5
+
[
31.5
]
+
[
42
]
*
3
+
[
31.5
]
):
self
.
class_dist_thred
=
class_dist_thred
def
__call__
(
self
,
input_dict
):
gt_bboxes_3d
=
input_dict
[
"gt_bboxes_3d"
]
gt_labels_3d
=
input_dict
[
"gt_labels_3d"
]
dist
=
np
.
sqrt
(
np
.
sum
(
gt_bboxes_3d
[:,
:
2
]
**
2
,
axis
=-
1
)
)
mask
=
np
.
array
([
False
]
*
len
(
dist
))
for
label_idx
,
dist_thred
in
enumerate
(
self
.
class_dist_thred
):
mask
=
np
.
logical_or
(
mask
,
np
.
logical_and
(
gt_labels_3d
==
label_idx
,
dist
<=
dist_thred
),
)
gt_bboxes_3d
=
gt_bboxes_3d
[
mask
]
gt_labels_3d
=
gt_labels_3d
[
mask
]
input_dict
[
"gt_bboxes_3d"
]
=
gt_bboxes_3d
input_dict
[
"gt_labels_3d"
]
=
gt_labels_3d
if
"instance_inds"
in
input_dict
:
input_dict
[
"instance_inds"
]
=
input_dict
[
"instance_inds"
][
mask
]
return
input_dict
def
__repr__
(
self
):
"""str: Return a string that describes the module."""
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
"(class_dist_thred=
{
self
.
class_dist_thred
}
)"
return
repr_str
@
PIPELINES
.
register_module
()
class
NormalizeMultiviewImage
(
object
):
"""Normalize the image.
Added key is "img_norm_cfg".
Args:
mean (sequence): Mean values of 3 channels.
std (sequence): Std values of 3 channels.
to_rgb (bool): Whether to convert the image from BGR to RGB,
default is true.
"""
def
__init__
(
self
,
mean
,
std
,
to_rgb
=
True
):
self
.
mean
=
np
.
array
(
mean
,
dtype
=
np
.
float32
)
self
.
std
=
np
.
array
(
std
,
dtype
=
np
.
float32
)
self
.
to_rgb
=
to_rgb
def
__call__
(
self
,
results
):
"""Call function to normalize images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Normalized results, 'img_norm_cfg' key is added into
result dict.
"""
results
[
"img"
]
=
[
mmcv
.
imnormalize
(
img
,
self
.
mean
,
self
.
std
,
self
.
to_rgb
)
for
img
in
results
[
"img"
]
]
results
[
"img_norm_cfg"
]
=
dict
(
mean
=
self
.
mean
,
std
=
self
.
std
,
to_rgb
=
self
.
to_rgb
)
return
results
def
__repr__
(
self
):
repr_str
=
self
.
__class__
.
__name__
repr_str
+=
f
"(mean=
{
self
.
mean
}
, std=
{
self
.
std
}
, to_rgb=
{
self
.
to_rgb
}
)"
return
repr_str
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment