Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f63a62b8
Commit
f63a62b8
authored
Jul 19, 2022
by
zhangshilong
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Rrfactor]Imvotenet
parent
edb6b369
Changes
20
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
798 additions
and
722 deletions
+798
-722
configs/_base_/datasets/sunrgbd-3d-10class.py
configs/_base_/datasets/sunrgbd-3d-10class.py
+35
-40
configs/_base_/models/imvotenet_image.py
configs/_base_/models/imvotenet_image.py
+15
-4
configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
...t/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
+55
-40
configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py
...igs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py
+34
-74
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+13
-10
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+2
-1
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+43
-6
mmdet3d/datasets/pipelines/loading.py
mmdet3d/datasets/pipelines/loading.py
+50
-28
mmdet3d/datasets/sunrgbd_dataset.py
mmdet3d/datasets/sunrgbd_dataset.py
+53
-7
mmdet3d/metrics/indoor_metric.py
mmdet3d/metrics/indoor_metric.py
+92
-1
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+8
-7
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+11
-3
mmdet3d/models/detectors/imvotenet.py
mmdet3d/models/detectors/imvotenet.py
+198
-480
mmdet3d/models/fusion_layers/coord_transform.py
mmdet3d/models/fusion_layers/coord_transform.py
+2
-2
mmdet3d/models/fusion_layers/vote_fusion.py
mmdet3d/models/fusion_layers/vote_fusion.py
+0
-2
tests/data/sunrgbd/sunrgbd_infos.pkl
tests/data/sunrgbd/sunrgbd_infos.pkl
+0
-0
tests/test_models/test_detectors/test_h3d.py
tests/test_models/test_detectors/test_h3d.py
+51
-0
tests/test_models/test_detectors/test_imvotenet.py
tests/test_models/test_detectors/test_imvotenet.py
+88
-0
tests/utils/model_utils.py
tests/utils/model_utils.py
+26
-2
tools/data_converter/update_infos_to_v2.py
tools/data_converter/update_infos_to_v2.py
+22
-15
No files found.
configs/_base_/datasets/sunrgbd-3d-10class.py
View file @
f63a62b8
...
@@ -2,6 +2,9 @@ dataset_type = 'SUNRGBDDataset'
...
@@ -2,6 +2,9 @@ dataset_type = 'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
'night_stand'
,
'bookshelf'
,
'bathtub'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
dict
(
type
=
'LoadPointsFromFile'
,
type
=
'LoadPointsFromFile'
,
...
@@ -21,8 +24,9 @@ train_pipeline = [
...
@@ -21,8 +24,9 @@ train_pipeline = [
scale_ratio_range
=
[
0.85
,
1.15
],
scale_ratio_range
=
[
0.85
,
1.15
],
shift_height
=
True
),
shift_height
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
dict
(
...
@@ -47,61 +51,52 @@ test_pipeline = [
...
@@ -47,61 +51,52 @@ test_pipeline = [
sync_2d
=
False
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_horizontal
=
0.5
,
),
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
)
dict
(
]),
type
=
'DefaultFormatBundle3D'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
samples_per_gpu
=
16
,
batch_size
=
16
,
workers_per_gpu
=
4
,
num_workers
=
4
,
train
=
dict
(
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
type
=
'RepeatDataset'
,
times
=
5
,
times
=
5
,
dataset
=
dict
(
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_train.pkl'
,
ann_file
=
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
filter_empty_gt
=
False
,
filter_empty_gt
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
box_type_3d
=
'Depth'
)))
val
=
dict
(
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
ann_file
=
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
box_type_3d
=
'Depth'
))
test
=
dict
(
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
ann_file
=
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
box_type_3d
=
'Depth'
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
evaluat
ion
=
dict
(
pipeline
=
eval_pipeline
)
test_
evaluat
or
=
val_evaluator
configs/_base_/models/imvotenet_image.py
View file @
f63a62b8
model
=
dict
(
model
=
dict
(
type
=
'ImVoteNet'
,
type
=
'ImVoteNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
# use caffe img_norm
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
img_backbone
=
dict
(
img_backbone
=
dict
(
type
=
'ResNet'
,
type
=
'
mmdet.
ResNet'
,
depth
=
50
,
depth
=
50
,
num_stages
=
4
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
out_indices
=
(
0
,
1
,
2
,
3
),
...
@@ -10,11 +17,12 @@ model = dict(
...
@@ -10,11 +17,12 @@ model = dict(
norm_eval
=
True
,
norm_eval
=
True
,
style
=
'caffe'
),
style
=
'caffe'
),
img_neck
=
dict
(
img_neck
=
dict
(
type
=
'FPN'
,
type
=
'
mmdet.
FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
out_channels
=
256
,
num_outs
=
5
),
num_outs
=
5
),
img_rpn_head
=
dict
(
img_rpn_head
=
dict
(
_scope_
=
'mmdet'
,
type
=
'RPNHead'
,
type
=
'RPNHead'
,
in_channels
=
256
,
in_channels
=
256
,
feat_channels
=
256
,
feat_channels
=
256
,
...
@@ -31,6 +39,7 @@ model = dict(
...
@@ -31,6 +39,7 @@ model = dict(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
img_roi_head
=
dict
(
img_roi_head
=
dict
(
_scope_
=
'mmdet'
,
type
=
'StandardRoIHead'
,
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
type
=
'SingleRoIExtractor'
,
...
@@ -56,6 +65,7 @@ model = dict(
...
@@ -56,6 +65,7 @@ model = dict(
train_cfg
=
dict
(
train_cfg
=
dict
(
img_rpn
=
dict
(
img_rpn
=
dict
(
assigner
=
dict
(
assigner
=
dict
(
_scope_
=
'mmdet'
,
type
=
'MaxIoUAssigner'
,
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
neg_iou_thr
=
0.3
,
...
@@ -63,7 +73,7 @@ model = dict(
...
@@ -63,7 +73,7 @@ model = dict(
match_low_quality
=
True
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
ignore_iof_thr
=-
1
),
sampler
=
dict
(
sampler
=
dict
(
type
=
'RandomSampler'
,
type
=
'
mmdet.
RandomSampler'
,
num
=
256
,
num
=
256
,
pos_fraction
=
0.5
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
neg_pos_ub
=-
1
,
...
@@ -80,6 +90,7 @@ model = dict(
...
@@ -80,6 +90,7 @@ model = dict(
min_bbox_size
=
0
),
min_bbox_size
=
0
),
img_rcnn
=
dict
(
img_rcnn
=
dict
(
assigner
=
dict
(
assigner
=
dict
(
_scope_
=
'mmdet'
,
type
=
'MaxIoUAssigner'
,
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
...
@@ -87,7 +98,7 @@ model = dict(
...
@@ -87,7 +98,7 @@ model = dict(
match_low_quality
=
False
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
ignore_iof_thr
=-
1
),
sampler
=
dict
(
sampler
=
dict
(
type
=
'RandomSampler'
,
type
=
'
mmdet.
RandomSampler'
,
num
=
512
,
num
=
512
,
pos_fraction
=
0.25
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
neg_pos_ub
=-
1
,
...
...
configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
View file @
f63a62b8
...
@@ -3,56 +3,71 @@ _base_ = [
...
@@ -3,56 +3,71 @@ _base_ = [
'../_base_/models/imvotenet_image.py'
'../_base_/models/imvotenet_image.py'
]
]
# use caffe img_norm
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
dict
(
type
=
'Resize'
,
type
=
'LoadAnnotations3D'
,
img_scale
=
[(
1333
,
480
),
(
1333
,
504
),
(
1333
,
528
),
(
1333
,
552
),
with_bbox
=
True
,
with_label
=
True
,
with_bbox_3d
=
False
,
with_label_3d
=
False
),
dict
(
type
=
'RandomChoiceResize'
,
scales
=
[(
1333
,
480
),
(
1333
,
504
),
(
1333
,
528
),
(
1333
,
552
),
(
1333
,
576
),
(
1333
,
600
)],
(
1333
,
576
),
(
1333
,
600
)],
multiscale_mode
=
'value'
,
keep_ratio
=
True
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
]),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
),
# online evaluation
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_bbox_3d
=
False
,
with_label_3d
=
False
),
dict
(
type
=
'Resize'
,
scale
=
(
1333
,
600
),
keep_ratio
=
True
),
dict
(
dict
(
type
=
'MultiScaleFlipAug'
,
type
=
'Pack3DDetInputs'
,
img_scale
=
(
1333
,
600
),
keys
=
([
'img'
]),
flip
=
False
,
meta_keys
=
(
'img_id'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
transforms
=
[
'scale_factor'
))
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
1
,
dataset
=
dict
(
pipeline
=
train_pipeline
)))
data
=
dict
(
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
samples_per_gpu
=
2
,
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
workers_per_gpu
=
2
,
train
=
dict
(
times
=
1
,
dataset
=
dict
(
pipeline
=
train_pipeline
)),
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
8
,
val_interval
=
1
)
val
=
dict
(
pipeline
=
test_pipeline
),
val_cfg
=
dict
(
type
=
'ValLoop'
)
test
=
dict
(
pipeline
=
test_pipeline
))
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
8
,
by_epoch
=
True
,
milestones
=
[
6
],
gamma
=
0.1
)
]
val_evaluator
=
dict
(
type
=
'Indoor2DMetric'
)
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# optimizer
optimizer_config
=
dict
(
grad_clip
=
None
)
optim_wrapper
=
dict
(
lr_config
=
dict
(
type
=
'OptimWrapper'
,
policy
=
'step'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
6
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
8
)
load_from
=
'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# noqa
load_from
=
'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# noqa
configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py
View file @
f63a62b8
...
@@ -7,10 +7,6 @@ _base_ = [
...
@@ -7,10 +7,6 @@ _base_ = [
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
'night_stand'
,
'bookshelf'
,
'bathtub'
)
# use caffe img_norm
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
model
=
dict
(
model
=
dict
(
pts_backbone
=
dict
(
pts_backbone
=
dict
(
type
=
'PointNet2SASSG'
,
type
=
'PointNet2SASSG'
,
...
@@ -48,10 +44,8 @@ model = dict(
...
@@ -48,10 +44,8 @@ model = dict(
[
0.76584
,
1.398258
,
0.472728
]]),
[
0.76584
,
1.398258
,
0.472728
]]),
pred_layer_cfg
=
dict
(
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
type
=
'
mmdet.
CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
reduction
=
'sum'
,
loss_weight
=
5.0
),
loss_weight
=
5.0
),
...
@@ -62,15 +56,23 @@ model = dict(
...
@@ -62,15 +56,23 @@ model = dict(
loss_src_weight
=
10.0
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
joint
=
dict
(
joint
=
dict
(
vote_module_cfg
=
dict
(
vote_module_cfg
=
dict
(
in_channels
=
512
,
in_channels
=
512
,
...
@@ -154,11 +156,11 @@ model = dict(
...
@@ -154,11 +156,11 @@ model = dict(
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
train_cfg
=
dict
(
pts
=
dict
(
pts
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)),
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
e
=
'vote'
)),
test_cfg
=
dict
(
test_cfg
=
dict
(
img_rcnn
=
dict
(
score_thr
=
0.1
),
img_rcnn
=
dict
(
score_thr
=
0.1
),
pts
=
dict
(
pts
=
dict
(
sample_mod
=
'seed'
,
sample_mod
e
=
'seed'
,
nms_thr
=
0.25
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
score_thr
=
0.05
,
per_class_proposal
=
True
)))
per_class_proposal
=
True
)))
...
@@ -171,12 +173,13 @@ train_pipeline = [
...
@@ -171,12 +173,13 @@ train_pipeline = [
load_dim
=
6
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
type
=
'LoadAnnotations3D'
,
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
600
),
keep_ratio
=
True
),
with_bbox
=
True
,
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.0
),
with_label
=
True
,
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
with_bbox_3d
=
True
,
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
with_label_3d
=
True
),
dict
(
type
=
'Resize'
,
scale
=
(
1333
,
600
),
keep_ratio
=
True
),
dict
(
dict
(
type
=
'RandomFlip3D'
,
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
sync_2d
=
False
,
...
@@ -188,15 +191,13 @@ train_pipeline = [
...
@@ -188,15 +191,13 @@ train_pipeline = [
scale_ratio_range
=
[
0.85
,
1.15
],
scale_ratio_range
=
[
0.85
,
1.15
],
shift_height
=
True
),
shift_height
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
(
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'points'
,
'gt_bboxes_3d'
,
'img'
,
'gt_bboxes'
,
'gt_
bboxes_
labels'
,
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
'gt_labels_3d'
])
])
)
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
dict
(
...
@@ -205,56 +206,15 @@ test_pipeline = [
...
@@ -205,56 +206,15 @@ test_pipeline = [
shift_height
=
True
,
shift_height
=
True
,
load_dim
=
6
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
use_dim
=
[
0
,
1
,
2
]),
dict
(
dict
(
type
=
'Resize'
,
scale
=
(
1333
,
600
),
keep_ratio
=
True
),
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
600
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.0
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'points'
])
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
,
'points'
])
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
,
'points'
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
dataset
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
evaluation
=
dict
(
pipeline
=
eval_pipeline
)
# may also use your own pre-trained image branch
# may also use your own pre-trained image branch
load_from
=
'https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222-cad62aeb.pth'
# noqa
load_from
=
'https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222-cad62aeb.pth'
# noqa
mmdet3d/datasets/det3d_dataset.py
View file @
f63a62b8
...
@@ -178,10 +178,11 @@ class Det3DDataset(BaseDataset):
...
@@ -178,10 +178,11 @@ class Det3DDataset(BaseDataset):
dict | None: Processed `ann_info`
dict | None: Processed `ann_info`
"""
"""
# add s or gt prefix for most keys after concat
# add s or gt prefix for most keys after concat
# we only process 3d annotations here, the corresponding
# 2d annotation process is in the `LoadAnnotations3D`
# in `pipelines`
name_mapping
=
{
name_mapping
=
{
'bbox_label'
:
'gt_labels'
,
'bbox_label_3d'
:
'gt_labels_3d'
,
'bbox_label_3d'
:
'gt_labels_3d'
,
'bbox'
:
'gt_bboxes'
,
'bbox_3d'
:
'gt_bboxes_3d'
,
'bbox_3d'
:
'gt_bboxes_3d'
,
'depth'
:
'depths'
,
'depth'
:
'depths'
,
'center_2d'
:
'centers_2d'
,
'center_2d'
:
'centers_2d'
,
...
@@ -196,6 +197,7 @@ class Det3DDataset(BaseDataset):
...
@@ -196,6 +197,7 @@ class Det3DDataset(BaseDataset):
keys
=
list
(
instances
[
0
].
keys
())
keys
=
list
(
instances
[
0
].
keys
())
ann_info
=
dict
()
ann_info
=
dict
()
for
ann_name
in
keys
:
for
ann_name
in
keys
:
if
ann_name
in
name_mapping
:
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
# map the original dataset label to training label
# map the original dataset label to training label
if
'label'
in
ann_name
:
if
'label'
in
ann_name
:
...
@@ -203,9 +205,10 @@ class Det3DDataset(BaseDataset):
...
@@ -203,9 +205,10 @@ class Det3DDataset(BaseDataset):
self
.
label_mapping
[
item
]
for
item
in
temp_anns
self
.
label_mapping
[
item
]
for
item
in
temp_anns
]
]
temp_anns
=
np
.
array
(
temp_anns
)
temp_anns
=
np
.
array
(
temp_anns
)
if
ann_name
in
name_mapping
:
ann_name
=
name_mapping
[
ann_name
]
ann_name
=
name_mapping
[
ann_name
]
ann_info
[
ann_name
]
=
temp_anns
ann_info
[
ann_name
]
=
temp_anns
ann_info
[
'instances'
]
=
info
[
'instances'
]
return
ann_info
return
ann_info
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
f63a62b8
...
@@ -87,7 +87,8 @@ class KittiDataset(Det3DDataset):
...
@@ -87,7 +87,8 @@ class KittiDataset(Det3DDataset):
if
'plane'
in
info
:
if
'plane'
in
info
:
# convert ground plane to velodyne coordinates
# convert ground plane to velodyne coordinates
plane
=
np
.
array
(
info
[
'plane'
])
plane
=
np
.
array
(
info
[
'plane'
])
lidar2cam
=
np
.
array
(
info
[
'images'
][
'CAM2'
][
'lidar2cam'
])
lidar2cam
=
np
.
array
(
info
[
'images'
][
'CAM2'
][
'lidar2cam'
],
dtype
=
np
.
float32
)
reverse
=
np
.
linalg
.
inv
(
lidar2cam
)
reverse
=
np
.
linalg
.
inv
(
lidar2cam
)
(
plane_norm_cam
,
plane_off_cam
)
=
(
plane
[:
3
],
(
plane_norm_cam
,
plane_off_cam
)
=
(
plane
[:
3
],
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
from
typing
import
List
,
Sequence
,
Union
import
mmcv
import
numpy
as
np
import
numpy
as
np
import
torch
from
mmcv
import
BaseTransform
from
mmcv
import
BaseTransform
from
mmcv.transforms
import
to_tensor
from
mmengine
import
InstanceData
from
mmengine
import
InstanceData
from
numpy
import
dtype
from
mmdet3d.core
import
Det3DDataSample
,
PointData
from
mmdet3d.core
import
Det3DDataSample
,
PointData
from
mmdet3d.core.bbox
import
BaseInstance3DBoxes
from
mmdet3d.core.bbox
import
BaseInstance3DBoxes
...
@@ -12,6 +14,38 @@ from mmdet3d.core.points import BasePoints
...
@@ -12,6 +14,38 @@ from mmdet3d.core.points import BasePoints
from
mmdet3d.registry
import
TRANSFORMS
from
mmdet3d.registry
import
TRANSFORMS
def
to_tensor
(
data
:
Union
[
torch
.
Tensor
,
np
.
ndarray
,
Sequence
,
int
,
float
])
->
torch
.
Tensor
:
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
Args:
data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
be converted.
Returns:
torch.Tensor: the converted data.
"""
if
isinstance
(
data
,
torch
.
Tensor
):
return
data
elif
isinstance
(
data
,
np
.
ndarray
):
if
data
.
dtype
is
dtype
(
'float64'
):
data
=
data
.
astype
(
np
.
float32
)
return
torch
.
from_numpy
(
data
)
elif
isinstance
(
data
,
Sequence
)
and
not
mmcv
.
is_str
(
data
):
return
torch
.
tensor
(
data
)
elif
isinstance
(
data
,
int
):
return
torch
.
LongTensor
([
data
])
elif
isinstance
(
data
,
float
):
return
torch
.
FloatTensor
([
data
])
else
:
raise
TypeError
(
f
'type
{
type
(
data
)
}
cannot be converted to tensor.'
)
@
TRANSFORMS
.
register_module
()
@
TRANSFORMS
.
register_module
()
class
Pack3DDetInputs
(
BaseTransform
):
class
Pack3DDetInputs
(
BaseTransform
):
INPUTS_KEYS
=
[
'points'
,
'img'
]
INPUTS_KEYS
=
[
'points'
,
'img'
]
...
@@ -20,7 +54,7 @@ class Pack3DDetInputs(BaseTransform):
...
@@ -20,7 +54,7 @@ class Pack3DDetInputs(BaseTransform):
]
]
INSTANCEDATA_2D_KEYS
=
[
INSTANCEDATA_2D_KEYS
=
[
'gt_bboxes'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_
bboxes_
labels'
,
]
]
SEG_KEYS
=
[
SEG_KEYS
=
[
...
@@ -121,8 +155,8 @@ class Pack3DDetInputs(BaseTransform):
...
@@ -121,8 +155,8 @@ class Pack3DDetInputs(BaseTransform):
for
key
in
[
for
key
in
[
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels
_3d
'
,
'attr_labels'
,
'pts_instance_mask'
,
'gt_
bboxes_
labels'
,
'attr_labels'
,
'pts_instance_mask'
,
'pts_semantic_mask'
,
'centers_2d'
,
'depths'
'pts_semantic_mask'
,
'centers_2d'
,
'depths'
,
'gt_labels_3d'
]:
]:
if
key
not
in
results
:
if
key
not
in
results
:
continue
continue
...
@@ -159,6 +193,9 @@ class Pack3DDetInputs(BaseTransform):
...
@@ -159,6 +193,9 @@ class Pack3DDetInputs(BaseTransform):
elif
key
in
self
.
INSTANCEDATA_3D_KEYS
:
elif
key
in
self
.
INSTANCEDATA_3D_KEYS
:
gt_instances_3d
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
gt_instances_3d
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
elif
key
in
self
.
INSTANCEDATA_2D_KEYS
:
elif
key
in
self
.
INSTANCEDATA_2D_KEYS
:
if
key
==
'gt_bboxes_labels'
:
gt_instances
[
'labels'
]
=
results
[
key
]
else
:
gt_instances
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
gt_instances
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
elif
key
in
self
.
SEG_KEYS
:
elif
key
in
self
.
SEG_KEYS
:
gt_pts_seg
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
gt_pts_seg
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
...
...
mmdet3d/datasets/pipelines/loading.py
View file @
f63a62b8
...
@@ -632,34 +632,6 @@ class LoadAnnotations3D(LoadAnnotations):
...
@@ -632,34 +632,6 @@ class LoadAnnotations3D(LoadAnnotations):
self
.
with_seg_3d
=
with_seg_3d
self
.
with_seg_3d
=
with_seg_3d
self
.
seg_3d_dtype
=
seg_3d_dtype
self
.
seg_3d_dtype
=
seg_3d_dtype
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
results
[
'gt_bboxes'
]
=
results
[
'ann_info'
][
'gt_bboxes'
]
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded label annotations.
"""
results
[
'gt_labels'
]
=
results
[
'ann_info'
][
'gt_labels'
]
def
_load_bboxes_3d
(
self
,
results
:
dict
)
->
dict
:
def
_load_bboxes_3d
(
self
,
results
:
dict
)
->
dict
:
"""Private function to move the 3D bounding box annotation from
"""Private function to move the 3D bounding box annotation from
`ann_info` field to the root of `results`.
`ann_info` field to the root of `results`.
...
@@ -769,6 +741,56 @@ class LoadAnnotations3D(LoadAnnotations):
...
@@ -769,6 +741,56 @@ class LoadAnnotations3D(LoadAnnotations):
results
[
'eval_ann_info'
][
'pts_semantic_mask'
]
=
pts_semantic_mask
results
[
'eval_ann_info'
][
'pts_semantic_mask'
]
=
pts_semantic_mask
return
results
return
results
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
The only difference is it remove the proceess for
`ignore_flag`
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes
=
[]
for
instance
in
results
[
'instances'
]:
gt_bboxes
.
append
(
instance
[
'bbox'
])
if
len
(
gt_bboxes
)
==
0
:
results
[
'gt_bboxes'
]
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
else
:
results
[
'gt_bboxes'
]
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
).
reshape
((
-
1
,
4
))
if
self
.
denorm_bbox
:
bbox_num
=
results
[
'gt_bboxes'
].
shape
[
0
]
if
bbox_num
!=
0
:
h
,
w
=
results
[
'img_shape'
]
results
[
'gt_bboxes'
][:,
0
::
2
]
*=
w
results
[
'gt_bboxes'
][:,
1
::
2
]
*=
h
if
'eval_ann_info'
in
results
:
results
[
'eval_ann_info'
][
'gt_bboxes'
]
=
results
[
'gt_bboxes'
]
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded label annotations.
"""
gt_bboxes_labels
=
[]
for
instance
in
results
[
'instances'
]:
gt_bboxes_labels
.
append
(
instance
[
'bbox_label'
])
if
len
(
gt_bboxes_labels
)
==
0
:
results
[
'gt_bboxes_labels'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
else
:
results
[
'gt_bboxes_labels'
]
=
np
.
array
(
gt_bboxes_labels
,
dtype
=
np
.
int64
)
if
'eval_ann_info'
in
results
:
results
[
'eval_ann_info'
][
'gt_bboxes_labels'
]
=
results
[
'gt_bboxes_labels'
]
def
transform
(
self
,
results
:
dict
)
->
dict
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types annotations.
"""Function to load multiple types annotations.
...
...
mmdet3d/datasets/sunrgbd_dataset.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
os.path
as
osp
from
typing
import
Callable
,
List
,
Optional
,
Union
from
typing
import
Callable
,
List
,
Optional
,
Union
import
numpy
as
np
import
numpy
as
np
...
@@ -22,13 +24,15 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -22,13 +24,15 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file.
ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
information. Defaults to None.
data_prefix (dict
, optional
): Prefix for data. Defaults to
data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`.
`dict(pts='points',img='sunrgbd_trainval')`.
pipeline (list[dict], optional): Pipeline used for data processing.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset.
default_cam_key (str): The default camera name adopted.
Defaults to "CAM0".
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
Defaults to 'Depth' in this dataset. Available options includes
...
@@ -36,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -36,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset):
- 'LiDAR': Box in LiDAR coordinates.
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool
, optional
): Whether to filter empty GT.
filter_empty_gt (bool): Whether to filter empty GT.
Defaults to True.
Defaults to True.
test_mode (bool
, optional
): Whether the dataset is in test mode.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
Defaults to False.
"""
"""
METAINFO
=
{
METAINFO
=
{
...
@@ -51,8 +55,9 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -51,8 +55,9 @@ class SUNRGBDDataset(Det3DDataset):
ann_file
:
str
,
ann_file
:
str
,
metainfo
:
Optional
[
dict
]
=
None
,
metainfo
:
Optional
[
dict
]
=
None
,
data_prefix
:
dict
=
dict
(
data_prefix
:
dict
=
dict
(
pts
=
'points'
,
img
=
'sunrgbd_trainval'
),
pts
=
'points'
,
img
=
'sunrgbd_trainval
/image
'
),
pipeline
:
List
[
Union
[
dict
,
Callable
]]
=
[],
pipeline
:
List
[
Union
[
dict
,
Callable
]]
=
[],
default_cam_key
:
str
=
'CAM0'
,
modality
=
dict
(
use_camera
=
True
,
use_lidar
=
True
),
modality
=
dict
(
use_camera
=
True
,
use_lidar
=
True
),
box_type_3d
:
str
=
'Depth'
,
box_type_3d
:
str
=
'Depth'
,
filter_empty_gt
:
bool
=
True
,
filter_empty_gt
:
bool
=
True
,
...
@@ -64,6 +69,7 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -64,6 +69,7 @@ class SUNRGBDDataset(Det3DDataset):
metainfo
=
metainfo
,
metainfo
=
metainfo
,
data_prefix
=
data_prefix
,
data_prefix
=
data_prefix
,
pipeline
=
pipeline
,
pipeline
=
pipeline
,
default_cam_key
=
default_cam_key
,
modality
=
modality
,
modality
=
modality
,
box_type_3d
=
box_type_3d
,
box_type_3d
=
box_type_3d
,
filter_empty_gt
=
filter_empty_gt
,
filter_empty_gt
=
filter_empty_gt
,
...
@@ -73,6 +79,47 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -73,6 +79,47 @@ class SUNRGBDDataset(Det3DDataset):
'use_lidar'
in
self
.
modality
'use_lidar'
in
self
.
modality
assert
self
.
modality
[
'use_camera'
]
or
self
.
modality
[
'use_lidar'
]
assert
self
.
modality
[
'use_camera'
]
or
self
.
modality
[
'use_lidar'
]
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the raw data info.
Convert all relative path of needed modality data file to
the absolute path. And process
the `instances` field to `ann_info` in training stage.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if
self
.
modality
[
'use_lidar'
]:
info
[
'lidar_points'
][
'lidar_path'
]
=
\
osp
.
join
(
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
img_info
[
'img_path'
]
=
osp
.
join
(
self
.
data_prefix
.
get
(
'img'
,
''
),
img_info
[
'img_path'
])
if
self
.
default_cam_key
is
not
None
:
info
[
'img_path'
]
=
info
[
'images'
][
self
.
default_cam_key
][
'img_path'
]
info
[
'depth2img'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'depth2img'
],
dtype
=
np
.
float32
)
if
not
self
.
test_mode
:
# used in traing
info
[
'ann_info'
]
=
self
.
parse_ann_info
(
info
)
if
self
.
test_mode
and
self
.
load_eval_anns
:
info
[
'eval_ann_info'
]
=
self
.
parse_ann_info
(
info
)
return
info
def
parse_ann_info
(
self
,
info
:
dict
)
->
dict
:
def
parse_ann_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the `instances` in data info to `ann_info`
"""Process the `instances` in data info to `ann_info`
...
@@ -83,12 +130,11 @@ class SUNRGBDDataset(Det3DDataset):
...
@@ -83,12 +130,11 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info`
dict: Processed `ann_info`
"""
"""
ann_info
=
super
().
parse_ann_info
(
info
)
ann_info
=
super
().
parse_ann_info
(
info
)
#
empty gt
#
process data without any annotations
if
ann_info
is
None
:
if
ann_info
is
None
:
ann_info
=
dict
()
ann_info
=
dict
()
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
# to target box structure
# to target box structure
ann_info
[
'gt_bboxes_3d'
]
=
DepthInstance3DBoxes
(
ann_info
[
'gt_bboxes_3d'
]
=
DepthInstance3DBoxes
(
ann_info
[
'gt_bboxes_3d'
],
ann_info
[
'gt_bboxes_3d'
],
...
...
mmdet3d/metrics/indoor_metric.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
typing
import
Dict
,
List
,
Optional
,
Sequence
from
typing
import
Dict
,
List
,
Optional
,
Sequence
import
numpy
as
np
from
mmengine.evaluator
import
BaseMetric
from
mmengine.evaluator
import
BaseMetric
from
mmengine.logging
import
MMLogger
from
mmengine.logging
import
MMLogger
from
mmdet3d.core
import
get_box_type
,
indoor_eval
from
mmdet3d.core
import
get_box_type
,
indoor_eval
from
mmdet3d.registry
import
METRICS
from
mmdet3d.registry
import
METRICS
from
mmdet.core
import
eval_map
@
METRICS
.
register_module
()
@
METRICS
.
register_module
()
class
IndoorMetric
(
BaseMetric
):
class
IndoorMetric
(
BaseMetric
):
"""
Kitti
evaluation metric.
"""
Indoor scene
evaluation metric.
Args:
Args:
iou_thr (list[float]): List of iou threshold when calculate the
iou_thr (list[float]): List of iou threshold when calculate the
...
@@ -90,3 +93,91 @@ class IndoorMetric(BaseMetric):
...
@@ -90,3 +93,91 @@ class IndoorMetric(BaseMetric):
box_mode_3d
=
box_mode_3d
)
box_mode_3d
=
box_mode_3d
)
return
ret_dict
return
ret_dict
@
METRICS
.
register_module
()
class
Indoor2DMetric
(
BaseMetric
):
"""indoor 2d predictions evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
metric. Defaults to [0.5].
collect_device (str, optional): Device name used for collecting
results from different ranks during distributed training.
Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
prefix (str): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
"""
def
__init__
(
self
,
iou_thr
:
List
[
float
]
=
[
0.5
],
collect_device
:
str
=
'cpu'
,
prefix
:
Optional
[
str
]
=
None
,
**
kwargs
):
super
(
Indoor2DMetric
,
self
).
__init__
(
prefix
=
prefix
,
collect_device
=
collect_device
)
self
.
iou_thr
=
iou_thr
def
process
(
self
,
data_batch
:
Sequence
[
dict
],
predictions
:
Sequence
[
dict
])
->
None
:
"""Process one batch of data samples and predictions.
The processed results should be stored in ``self.results``,
which will be used to compute the metrics when all batches
have been processed.
Args:
data_batch (Sequence[dict]): A batch of data
from the dataloader.
predictions (Sequence[dict]): A batch of outputs from
the model.
"""
batch_eval_anns
=
[
item
[
'data_sample'
][
'eval_ann_info'
]
for
item
in
data_batch
]
for
eval_ann
,
pred_dict
in
zip
(
batch_eval_anns
,
predictions
):
pred
=
pred_dict
[
'pred_instances'
]
ann
=
dict
(
labels
=
eval_ann
[
'gt_bboxes_labels'
],
bboxes
=
eval_ann
[
'gt_bboxes'
])
pred_bboxes
=
pred
[
'bboxes'
].
cpu
().
numpy
()
pred_scores
=
pred
[
'scores'
].
cpu
().
numpy
()
pred_labels
=
pred
[
'labels'
].
cpu
().
numpy
()
dets
=
[]
for
label
in
range
(
len
(
self
.
dataset_meta
[
'CLASSES'
])):
index
=
np
.
where
(
pred_labels
==
label
)[
0
]
pred_bbox_scores
=
np
.
hstack
(
[
pred_bboxes
[
index
],
pred_scores
[
index
].
reshape
((
-
1
,
1
))])
dets
.
append
(
pred_bbox_scores
)
self
.
results
.
append
((
ann
,
dets
))
def
compute_metrics
(
self
,
results
:
list
)
->
Dict
[
str
,
float
]:
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
the metrics, and the values are corresponding results.
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
annotations
,
preds
=
zip
(
*
results
)
eval_results
=
OrderedDict
()
iou_thr_2d
=
(
self
.
iou_thr
)
if
isinstance
(
self
.
iou_thr
,
float
)
else
self
.
iou_thr
for
iou_thr_2d_single
in
iou_thr_2d
:
mean_ap
,
_
=
eval_map
(
preds
,
annotations
,
scale_ranges
=
None
,
iou_thr
=
iou_thr_2d_single
,
dataset
=
self
.
dataset_meta
[
'CLASSES'
],
logger
=
logger
)
eval_results
[
'mAP_'
+
str
(
iou_thr_2d_single
)]
=
mean_ap
return
eval_results
mmdet3d/models/dense_heads/vote_head.py
View file @
f63a62b8
...
@@ -754,15 +754,16 @@ class VoteHead(BaseModule):
...
@@ -754,15 +754,16 @@ class VoteHead(BaseModule):
batch_size
=
bbox3d
.
shape
[
0
]
batch_size
=
bbox3d
.
shape
[
0
]
results_list
=
list
()
results_list
=
list
()
if
use_nms
:
if
use_nms
:
for
b
in
range
(
batch_size
):
for
b
atch_index
in
range
(
batch_size
):
temp_results
=
InstanceData
()
temp_results
=
InstanceData
()
bbox_selected
,
score_selected
,
labels
=
\
bbox_selected
,
score_selected
,
labels
=
\
self
.
multiclass_nms_single
(
obj_scores
[
b
],
self
.
multiclass_nms_single
(
sem_scores
[
b
],
obj_scores
[
batch_index
],
bbox3d
[
b
],
sem_scores
[
batch_index
],
stack_points
[
b
,
...,
:
3
],
bbox3d
[
batch_index
],
batch_input_metas
[
b
])
stack_points
[
batch_index
,
...,
:
3
],
bbox
=
batch_input_metas
[
b
][
'box_type_3d'
](
batch_input_metas
[
batch_index
])
bbox
=
batch_input_metas
[
batch_index
][
'box_type_3d'
](
bbox_selected
,
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
],
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
)
with_yaw
=
self
.
bbox_coder
.
with_rot
)
...
...
mmdet3d/models/detectors/base.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
from
typing
import
List
,
Optional
,
Union
from
mmengine
import
InstanceData
from
mmengine
import
InstanceData
...
@@ -91,8 +91,8 @@ class Base3DDetector(BaseDetector):
...
@@ -91,8 +91,8 @@ class Base3DDetector(BaseDetector):
def
convert_to_datasample
(
def
convert_to_datasample
(
self
,
self
,
results_list_3d
:
InstanceList
,
results_list_3d
:
Optional
[
InstanceList
]
=
None
,
results_list_2d
:
InstanceList
=
None
,
results_list_2d
:
Optional
[
InstanceList
]
=
None
,
)
->
SampleList
:
)
->
SampleList
:
"""Convert results list to `Det3DDataSample`.
"""Convert results list to `Det3DDataSample`.
...
@@ -128,10 +128,18 @@ class Base3DDetector(BaseDetector):
...
@@ -128,10 +128,18 @@ class Base3DDetector(BaseDetector):
"""
"""
data_sample_list
=
[]
data_sample_list
=
[]
assert
(
results_list_2d
is
not
None
)
or
\
(
results_list_3d
is
not
None
),
\
'please pass at least one type of results_list'
if
results_list_2d
is
None
:
if
results_list_2d
is
None
:
results_list_2d
=
[
results_list_2d
=
[
InstanceData
()
for
_
in
range
(
len
(
results_list_3d
))
InstanceData
()
for
_
in
range
(
len
(
results_list_3d
))
]
]
if
results_list_3d
is
None
:
results_list_3d
=
[
InstanceData
()
for
_
in
range
(
len
(
results_list_2d
))
]
for
i
in
range
(
len
(
results_list_3d
)):
for
i
in
range
(
len
(
results_list_3d
)):
result
=
Det3DDataSample
()
result
=
Det3DDataSample
()
result
.
pred_instances_3d
=
results_list_3d
[
i
]
result
.
pred_instances_3d
=
results_list_3d
[
i
]
...
...
mmdet3d/models/detectors/imvotenet.py
View file @
f63a62b8
This diff is collapsed.
Click to expand it.
mmdet3d/models/fusion_layers/coord_transform.py
View file @
f63a62b8
...
@@ -105,8 +105,8 @@ def extract_2d_info(img_meta, tensor):
...
@@ -105,8 +105,8 @@ def extract_2d_info(img_meta, tensor):
"""
"""
img_shape
=
img_meta
[
'img_shape'
]
img_shape
=
img_meta
[
'img_shape'
]
ori_shape
=
img_meta
[
'ori_shape'
]
ori_shape
=
img_meta
[
'ori_shape'
]
img_h
,
img_w
,
_
=
img_shape
img_h
,
img_w
=
img_shape
ori_h
,
ori_w
,
_
=
ori_shape
ori_h
,
ori_w
=
ori_shape
img_scale_factor
=
(
img_scale_factor
=
(
tensor
.
new_tensor
(
img_meta
[
'scale_factor'
][:
2
])
tensor
.
new_tensor
(
img_meta
[
'scale_factor'
][:
2
])
...
...
mmdet3d/models/fusion_layers/vote_fusion.py
View file @
f63a62b8
...
@@ -45,8 +45,6 @@ class VoteFusion(nn.Module):
...
@@ -45,8 +45,6 @@ class VoteFusion(nn.Module):
seed_num
=
seed_3d_depth
.
shape
[
0
]
seed_num
=
seed_3d_depth
.
shape
[
0
]
img_shape
=
img_meta
[
'img_shape'
]
img_shape
=
img_meta
[
'img_shape'
]
img_h
,
img_w
,
_
=
img_shape
# first reverse the data transformations
# first reverse the data transformations
xyz_depth
=
apply_3d_transformation
(
xyz_depth
=
apply_3d_transformation
(
seed_3d_depth
,
'DEPTH'
,
img_meta
,
reverse
=
True
)
seed_3d_depth
,
'DEPTH'
,
img_meta
,
reverse
=
True
)
...
...
tests/data/sunrgbd/sunrgbd_infos.pkl
View file @
f63a62b8
No preview for this file type
tests/test_models/test_detectors/test_h3d.py
0 → 100644
View file @
f63a62b8
import
unittest
import
torch
from
mmengine
import
DefaultScope
from
mmdet3d.registry
import
MODELS
from
tests.utils.model_utils
import
(
_create_detector_inputs
,
_get_detector_cfg
,
_setup_seed
)
class
TestH3D
(
unittest
.
TestCase
):
def
test_h3dnet
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'H3DNet'
)
DefaultScope
.
get_instance
(
'test_H3DNet'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
voxel_net_cfg
=
_get_detector_cfg
(
'h3dnet/h3dnet_3x8_scannet-3d-18class.py'
)
model
=
MODELS
.
build
(
voxel_net_cfg
)
num_gt_instance
=
5
data
=
[
_create_detector_inputs
(
num_gt_instance
=
num_gt_instance
,
points_feat_dim
=
4
,
bboxes_3d_type
=
'depth'
,
with_pts_semantic_mask
=
True
,
with_pts_instance_mask
=
True
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'scores_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'labels_3d'
,
results
[
0
].
pred_instances_3d
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
losses
[
'vote_loss'
],
0
)
self
.
assertGreater
(
losses
[
'objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
'center_loss'
],
0
)
tests/test_models/test_detectors/test_imvotenet.py
0 → 100644
View file @
f63a62b8
import
unittest
import
torch
from
mmengine
import
DefaultScope
from
mmdet3d.registry
import
MODELS
from
tests.utils.model_utils
import
(
_create_detector_inputs
,
_get_detector_cfg
,
_setup_seed
)
class
TestImvoteNet
(
unittest
.
TestCase
):
def
test_imvotenet_only_img
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'ImVoteNet'
)
DefaultScope
.
get_instance
(
'test_imvotenet_img'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
votenet_net_cfg
=
_get_detector_cfg
(
'imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py'
)
model
=
MODELS
.
build
(
votenet_net_cfg
)
data
=
[
_create_detector_inputs
(
with_points
=
False
,
with_img
=
True
,
img_size
=
128
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes'
,
results
[
0
].
pred_instances
)
self
.
assertIn
(
'scores'
,
results
[
0
].
pred_instances
)
self
.
assertIn
(
'labels'
,
results
[
0
].
pred_instances
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
sum
(
losses
[
'loss_rpn_cls'
]),
0
)
self
.
assertGreater
(
losses
[
'loss_cls'
],
0
)
self
.
assertGreater
(
losses
[
'loss_bbox'
],
0
)
def
test_imvotenet
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'ImVoteNet'
)
DefaultScope
.
get_instance
(
'test_imvotenet'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
votenet_net_cfg
=
_get_detector_cfg
(
'imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py'
)
model
=
MODELS
.
build
(
votenet_net_cfg
)
data
=
[
_create_detector_inputs
(
with_points
=
True
,
with_img
=
True
,
img_size
=
128
,
bboxes_3d_type
=
'depth'
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'scores_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'labels_3d'
,
results
[
0
].
pred_instances_3d
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
losses
[
'vote_loss'
],
0
)
self
.
assertGreater
(
losses
[
'objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
'semantic_loss'
],
0
)
tests/utils/model_utils.py
View file @
f63a62b8
...
@@ -75,6 +75,7 @@ def _get_detector_cfg(fname):
...
@@ -75,6 +75,7 @@ def _get_detector_cfg(fname):
def
_create_detector_inputs
(
seed
=
0
,
def
_create_detector_inputs
(
seed
=
0
,
with_points
=
True
,
with_points
=
True
,
with_img
=
False
,
with_img
=
False
,
img_size
=
10
,
num_gt_instance
=
20
,
num_gt_instance
=
20
,
num_points
=
10
,
num_points
=
10
,
points_feat_dim
=
4
,
points_feat_dim
=
4
,
...
@@ -90,23 +91,46 @@ def _create_detector_inputs(seed=0,
...
@@ -90,23 +91,46 @@ def _create_detector_inputs(seed=0,
'depth'
:
DepthInstance3DBoxes
,
'depth'
:
DepthInstance3DBoxes
,
'cam'
:
CameraInstance3DBoxes
'cam'
:
CameraInstance3DBoxes
}
}
meta_info
=
dict
()
meta_info
[
'depth2img'
]
=
np
.
array
(
[[
5.23289349e+02
,
3.68831943e+02
,
6.10469439e+01
],
[
1.09560138e+02
,
1.97404735e+02
,
-
5.47377738e+02
],
[
1.25930002e-02
,
9.92229998e-01
,
-
1.23769999e-01
]])
meta_info
[
'lidar2img'
]
=
np
.
array
(
[[
5.23289349e+02
,
3.68831943e+02
,
6.10469439e+01
],
[
1.09560138e+02
,
1.97404735e+02
,
-
5.47377738e+02
],
[
1.25930002e-02
,
9.92229998e-01
,
-
1.23769999e-01
]])
if
with_points
:
if
with_points
:
points
=
torch
.
rand
([
num_points
,
points_feat_dim
])
points
=
torch
.
rand
([
num_points
,
points_feat_dim
])
else
:
else
:
points
=
None
points
=
None
if
with_img
:
if
with_img
:
img
=
torch
.
rand
(
3
,
10
,
10
)
img
=
torch
.
rand
(
3
,
img_size
,
img_size
)
meta_info
[
'img_shape'
]
=
(
img_size
,
img_size
)
meta_info
[
'ori_shape'
]
=
(
img_size
,
img_size
)
meta_info
[
'scale_factor'
]
=
np
.
array
([
1.
,
1.
])
else
:
else
:
img
=
None
img
=
None
inputs_dict
=
dict
(
img
=
img
,
points
=
points
)
inputs_dict
=
dict
(
img
=
img
,
points
=
points
)
gt_instance_3d
=
InstanceData
()
gt_instance_3d
=
InstanceData
()
gt_instance_3d
.
bboxes_3d
=
bbox_3d_class
[
bboxes_3d_type
](
gt_instance_3d
.
bboxes_3d
=
bbox_3d_class
[
bboxes_3d_type
](
torch
.
rand
([
num_gt_instance
,
gt_bboxes_dim
]),
box_dim
=
gt_bboxes_dim
)
torch
.
rand
([
num_gt_instance
,
gt_bboxes_dim
]),
box_dim
=
gt_bboxes_dim
)
gt_instance_3d
.
labels_3d
=
torch
.
randint
(
0
,
num_classes
,
[
num_gt_instance
])
gt_instance_3d
.
labels_3d
=
torch
.
randint
(
0
,
num_classes
,
[
num_gt_instance
])
data_sample
=
Det3DDataSample
(
data_sample
=
Det3DDataSample
(
metainfo
=
dict
(
box_type_3d
=
bbox_3d_class
[
bboxes_3d_type
]))
metainfo
=
dict
(
box_type_3d
=
bbox_3d_class
[
bboxes_3d_type
]))
data_sample
.
set_metainfo
(
meta_info
)
data_sample
.
gt_instances_3d
=
gt_instance_3d
data_sample
.
gt_instances_3d
=
gt_instance_3d
gt_instance
=
InstanceData
()
gt_instance
.
labels
=
torch
.
randint
(
0
,
num_classes
,
[
num_gt_instance
])
gt_instance
.
bboxes
=
torch
.
rand
(
num_gt_instance
,
4
)
gt_instance
.
bboxes
[:,
2
:]
=
gt_instance
.
bboxes
[:,
:
2
]
+
gt_instance
.
bboxes
[:,
2
:]
data_sample
.
gt_instances
=
gt_instance
data_sample
.
gt_pts_seg
=
PointData
()
data_sample
.
gt_pts_seg
=
PointData
()
if
with_pts_instance_mask
:
if
with_pts_instance_mask
:
pts_instance_mask
=
torch
.
randint
(
0
,
num_gt_instance
,
[
num_points
])
pts_instance_mask
=
torch
.
randint
(
0
,
num_gt_instance
,
[
num_points
])
...
...
tools/data_converter/update_infos_to_v2.py
View file @
f63a62b8
...
@@ -652,6 +652,9 @@ def update_sunrgbd_infos(pkl_path, out_dir):
...
@@ -652,6 +652,9 @@ def update_sunrgbd_infos(pkl_path, out_dir):
temp_data_info
[
'images'
][
'CAM0'
][
'width'
]
=
w
temp_data_info
[
'images'
][
'CAM0'
][
'width'
]
=
w
anns
=
ori_info_dict
[
'annos'
]
anns
=
ori_info_dict
[
'annos'
]
if
anns
[
'gt_num'
]
==
0
:
instance_list
=
[]
else
:
num_instances
=
len
(
anns
[
'name'
])
num_instances
=
len
(
anns
[
'name'
])
ignore_class_name
=
set
()
ignore_class_name
=
set
()
instance_list
=
[]
instance_list
=
[]
...
@@ -659,12 +662,16 @@ def update_sunrgbd_infos(pkl_path, out_dir):
...
@@ -659,12 +662,16 @@ def update_sunrgbd_infos(pkl_path, out_dir):
empty_instance
=
get_empty_instance
()
empty_instance
=
get_empty_instance
()
empty_instance
[
'bbox_3d'
]
=
anns
[
'gt_boxes_upright_depth'
][
empty_instance
[
'bbox_3d'
]
=
anns
[
'gt_boxes_upright_depth'
][
instance_id
].
tolist
()
instance_id
].
tolist
()
empty_instance
[
'bbox'
]
=
anns
[
'bbox'
][
instance_id
].
tolist
()
if
anns
[
'name'
][
instance_id
]
in
METAINFO
[
'CLASSES'
]:
if
anns
[
'name'
][
instance_id
]
in
METAINFO
[
'CLASSES'
]:
empty_instance
[
'bbox_label_3d'
]
=
METAINFO
[
'CLASSES'
].
index
(
empty_instance
[
'bbox_label_3d'
]
=
METAINFO
[
anns
[
'name'
][
instance_id
])
'CLASSES'
].
index
(
anns
[
'name'
][
instance_id
])
empty_instance
[
'bbox_label'
]
=
empty_instance
[
'bbox_label_3d'
]
else
:
else
:
ignore_class_name
.
add
(
anns
[
'name'
][
instance_id
])
ignore_class_name
.
add
(
anns
[
'name'
][
instance_id
])
empty_instance
[
'bbox_label_3d'
]
=
-
1
empty_instance
[
'bbox_label_3d'
]
=
-
1
empty_instance
[
'bbox_label'
]
=
-
1
empty_instance
=
clear_instance_unused_keys
(
empty_instance
)
empty_instance
=
clear_instance_unused_keys
(
empty_instance
)
instance_list
.
append
(
empty_instance
)
instance_list
.
append
(
empty_instance
)
temp_data_info
[
'instances'
]
=
instance_list
temp_data_info
[
'instances'
]
=
instance_list
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment