Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishj6
BEVFomer
Commits
4cd43886
Commit
4cd43886
authored
Sep 01, 2025
by
lishj6
🏸
Browse files
init
parent
a9a1fe81
Changes
207
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3572 additions
and
0 deletions
+3572
-0
projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
...cts/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
+97
-0
projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
+3
-0
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
...det3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
+136
-0
projects/mmdet3d_plugin/core/bbox/coders/__init__.py
projects/mmdet3d_plugin/core/bbox/coders/__init__.py
+3
-0
projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
+122
-0
projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
+4
-0
projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
+90
-0
projects/mmdet3d_plugin/core/bbox/util.py
projects/mmdet3d_plugin/core/bbox/util.py
+53
-0
projects/mmdet3d_plugin/core/evaluation/__init__.py
projects/mmdet3d_plugin/core/evaluation/__init__.py
+1
-0
projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
+91
-0
projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
+251
-0
projects/mmdet3d_plugin/datasets/__init__.py
projects/mmdet3d_plugin/datasets/__init__.py
+8
-0
projects/mmdet3d_plugin/datasets/builder.py
projects/mmdet3d_plugin/datasets/builder.py
+166
-0
projects/mmdet3d_plugin/datasets/nuscenes_dataset.py
projects/mmdet3d_plugin/datasets/nuscenes_dataset.py
+240
-0
projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py
projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py
+303
-0
projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py
projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py
+777
-0
projects/mmdet3d_plugin/datasets/nuscnes_eval.py
projects/mmdet3d_plugin/datasets/nuscnes_eval.py
+751
-0
projects/mmdet3d_plugin/datasets/pipelines/__init__.py
projects/mmdet3d_plugin/datasets/pipelines/__init__.py
+13
-0
projects/mmdet3d_plugin/datasets/pipelines/augmentation.py
projects/mmdet3d_plugin/datasets/pipelines/augmentation.py
+369
-0
projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py
projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py
+94
-0
No files found.
projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
0 → 100644
View file @
4cd43886
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import
os.path
as
osp
import
torch
import
mmcv
from
mmcv.runner.base_runner
import
BaseRunner
from
mmcv.runner.epoch_based_runner
import
EpochBasedRunner
from
mmcv.runner.builder
import
RUNNERS
from
mmcv.runner.checkpoint
import
save_checkpoint
from
mmcv.runner.utils
import
get_host_info
from
pprint
import
pprint
from
mmcv.parallel.data_container
import
DataContainer
@
RUNNERS
.
register_module
()
class
EpochBasedRunner_video
(
EpochBasedRunner
):
'''
# basic logic
input_sequence = [a, b, c] # given a sequence of samples
prev_bev = None
for each in input_sequcene[:-1]
prev_bev = eval_model(each, prev_bev)) # inference only.
model(input_sequcene[-1], prev_bev) # train the last sample.
'''
def
__init__
(
self
,
model
,
eval_model
=
None
,
batch_processor
=
None
,
optimizer
=
None
,
work_dir
=
None
,
logger
=
None
,
meta
=
None
,
keys
=
[
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'img'
],
max_iters
=
None
,
max_epochs
=
None
):
super
().
__init__
(
model
,
batch_processor
,
optimizer
,
work_dir
,
logger
,
meta
,
max_iters
,
max_epochs
)
keys
.
append
(
'img_metas'
)
self
.
keys
=
keys
self
.
eval_model
=
eval_model
self
.
eval_model
.
eval
()
def
run_iter
(
self
,
data_batch
,
train_mode
,
**
kwargs
):
if
self
.
batch_processor
is
not
None
:
assert
False
# outputs = self.batch_processor(
# self.model, data_batch, train_mode=train_mode, **kwargs)
elif
train_mode
:
num_samples
=
data_batch
[
'img'
].
data
[
0
].
size
(
1
)
data_list
=
[]
prev_bev
=
None
for
i
in
range
(
num_samples
):
data
=
{}
for
key
in
self
.
keys
:
if
key
not
in
[
'img_metas'
,
'img'
,
'points'
]:
data
[
key
]
=
data_batch
[
key
]
else
:
if
key
==
'img'
:
data
[
'img'
]
=
DataContainer
(
data
=
[
data_batch
[
'img'
].
data
[
0
][:,
i
]],
cpu_only
=
data_batch
[
'img'
].
cpu_only
,
stack
=
True
)
elif
key
==
'img_metas'
:
data
[
'img_metas'
]
=
DataContainer
(
data
=
[[
each
[
i
]
for
each
in
data_batch
[
'img_metas'
].
data
[
0
]]],
cpu_only
=
data_batch
[
'img_metas'
].
cpu_only
)
else
:
assert
False
data_list
.
append
(
data
)
with
torch
.
no_grad
():
for
i
in
range
(
num_samples
-
1
):
if
data_list
[
i
][
'img_metas'
].
data
[
0
][
0
][
'prev_bev_exists'
]:
data_list
[
i
][
'prev_bev'
]
=
DataContainer
(
data
=
[
prev_bev
],
cpu_only
=
False
)
prev_bev
=
self
.
eval_model
.
val_step
(
data_list
[
i
],
self
.
optimizer
,
**
kwargs
)
if
data_list
[
-
1
][
'img_metas'
].
data
[
0
][
0
][
'prev_bev_exists'
]:
data_list
[
-
1
][
'prev_bev'
]
=
DataContainer
(
data
=
[
prev_bev
],
cpu_only
=
False
)
outputs
=
self
.
model
.
train_step
(
data_list
[
-
1
],
self
.
optimizer
,
**
kwargs
)
else
:
assert
False
# outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
if
not
isinstance
(
outputs
,
dict
):
raise
TypeError
(
'"batch_processor()" or "model.train_step()"'
'and "model.val_step()" must return a dict'
)
if
'log_vars'
in
outputs
:
self
.
log_buffer
.
update
(
outputs
[
'log_vars'
],
outputs
[
'num_samples'
])
self
.
outputs
=
outputs
\ No newline at end of file
projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
0 → 100644
View file @
4cd43886
from
.hungarian_assigner_3d
import
HungarianAssigner3D
__all__
=
[
'HungarianAssigner3D'
]
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
0 → 100644
View file @
4cd43886
import
torch
from
mmdet.core.bbox.builder
import
BBOX_ASSIGNERS
from
mmdet.core.bbox.assigners
import
AssignResult
from
mmdet.core.bbox.assigners
import
BaseAssigner
from
mmdet.core.bbox.match_costs
import
build_match_cost
from
mmdet.models.utils.transformer
import
inverse_sigmoid
from
projects.mmdet3d_plugin.core.bbox.util
import
normalize_bbox
try
:
from
scipy.optimize
import
linear_sum_assignment
except
ImportError
:
linear_sum_assignment
=
None
@
BBOX_ASSIGNERS
.
register_module
()
class
HungarianAssigner3D
(
BaseAssigner
):
"""Computes one-to-one matching between predictions and ground truth.
This class computes an assignment between the targets and the predictions
based on the costs. The costs are weighted sum of three components:
classification cost, regression L1 cost and regression iou cost. The
targets don't include the no_object, so generally there are more
predictions than targets. After the one-to-one matching, the un-matched
are treated as backgrounds. Thus each query prediction will be assigned
with `0` or a positive integer indicating the ground truth index:
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
bbox_weight (int | float, optional): The scale factor for regression
L1 cost. Default 1.0.
iou_weight (int | float, optional): The scale factor for regression
iou cost. Default 1.0.
iou_calculator (dict | optional): The config for the iou calculation.
Default type `BboxOverlaps2D`.
iou_mode (str | optional): "iou" (intersection over union), "iof"
(intersection over foreground), or "giou" (generalized
intersection over union). Default "giou".
"""
def
__init__
(
self
,
cls_cost
=
dict
(
type
=
'ClassificationCost'
,
weight
=
1.
),
reg_cost
=
dict
(
type
=
'BBoxL1Cost'
,
weight
=
1.0
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
pc_range
=
None
):
self
.
cls_cost
=
build_match_cost
(
cls_cost
)
self
.
reg_cost
=
build_match_cost
(
reg_cost
)
self
.
iou_cost
=
build_match_cost
(
iou_cost
)
self
.
pc_range
=
pc_range
def
assign
(
self
,
bbox_pred
,
cls_pred
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
=
None
,
eps
=
1e-7
):
"""Computes one-to-one matching based on the weighted costs.
This method assign each query prediction to a ground truth or
background. The `assigned_gt_inds` with -1 means don't care,
0 means negative sample, and positive number is the index (1-based)
of assigned gt.
The assignment is done in the following steps, the order matters.
1. assign every prediction to -1
2. compute the weighted costs
3. do Hungarian matching on CPU based on the costs
4. assign all to 0 (background) first, then for each matched pair
between predictions and gts, treat this prediction as foreground
and assign the corresponding gt index (plus 1) to it.
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`. Default None.
eps (int | float, optional): A value added to the denominator for
numerical stability. Default 1e-7.
Returns:
:obj:`AssignResult`: The assigned result.
"""
assert
gt_bboxes_ignore
is
None
,
\
'Only case when gt_bboxes_ignore is None is supported.'
num_gts
,
num_bboxes
=
gt_bboxes
.
size
(
0
),
bbox_pred
.
size
(
0
)
# 1. assign -1 by default
assigned_gt_inds
=
bbox_pred
.
new_full
((
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
assigned_labels
=
bbox_pred
.
new_full
((
num_bboxes
,
),
-
1
,
dtype
=
torch
.
long
)
if
num_gts
==
0
or
num_bboxes
==
0
:
# No ground truth or boxes, return empty assignment
if
num_gts
==
0
:
# No ground truth, assign all to background
assigned_gt_inds
[:]
=
0
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
# 2. compute the weighted costs
# classification and bboxcost.
cls_cost
=
self
.
cls_cost
(
cls_pred
,
gt_labels
)
# regression L1 cost
normalized_gt_bboxes
=
normalize_bbox
(
gt_bboxes
,
self
.
pc_range
)
reg_cost
=
self
.
reg_cost
(
bbox_pred
[:,
:
8
],
normalized_gt_bboxes
[:,
:
8
])
# weighted sum of above two costs
cost
=
cls_cost
+
reg_cost
# 3. do Hungarian matching on CPU using linear_sum_assignment
cost
=
cost
.
detach
().
cpu
()
if
linear_sum_assignment
is
None
:
raise
ImportError
(
'Please run "pip install scipy" '
'to install scipy first.'
)
matched_row_inds
,
matched_col_inds
=
linear_sum_assignment
(
cost
)
matched_row_inds
=
torch
.
from_numpy
(
matched_row_inds
).
to
(
bbox_pred
.
device
)
matched_col_inds
=
torch
.
from_numpy
(
matched_col_inds
).
to
(
bbox_pred
.
device
)
# 4. assign backgrounds and foregrounds
# assign all indices to backgrounds first
assigned_gt_inds
[:]
=
0
# assign foregrounds based on matching results
assigned_gt_inds
[
matched_row_inds
]
=
matched_col_inds
+
1
assigned_labels
[
matched_row_inds
]
=
gt_labels
[
matched_col_inds
]
return
AssignResult
(
num_gts
,
assigned_gt_inds
,
None
,
labels
=
assigned_labels
)
\ No newline at end of file
projects/mmdet3d_plugin/core/bbox/coders/__init__.py
0 → 100644
View file @
4cd43886
from
.nms_free_coder
import
NMSFreeCoder
__all__
=
[
'NMSFreeCoder'
]
projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
0 → 100644
View file @
4cd43886
import
torch
from
mmdet.core.bbox
import
BaseBBoxCoder
from
mmdet.core.bbox.builder
import
BBOX_CODERS
from
projects.mmdet3d_plugin.core.bbox.util
import
denormalize_bbox
import
numpy
as
np
@
BBOX_CODERS
.
register_module
()
class
NMSFreeCoder
(
BaseBBoxCoder
):
"""Bbox coder for NMS-free detector.
Args:
pc_range (list[float]): Range of point cloud.
post_center_range (list[float]): Limit of the center.
Default: None.
max_num (int): Max number to be kept. Default: 100.
score_threshold (float): Threshold to filter boxes based on score.
Default: None.
code_size (int): Code size of bboxes. Default: 9
"""
def
__init__
(
self
,
pc_range
,
voxel_size
=
None
,
post_center_range
=
None
,
max_num
=
100
,
score_threshold
=
None
,
num_classes
=
10
):
self
.
pc_range
=
pc_range
self
.
voxel_size
=
voxel_size
self
.
post_center_range
=
post_center_range
self
.
max_num
=
max_num
self
.
score_threshold
=
score_threshold
self
.
num_classes
=
num_classes
def
encode
(
self
):
pass
def
decode_single
(
self
,
cls_scores
,
bbox_preds
):
"""Decode bboxes.
Args:
cls_scores (Tensor): Outputs from the classification head,
\
shape [num_query, cls_out_channels]. Note
\
cls_out_channels should includes background.
bbox_preds (Tensor): Outputs from the regression
\
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy).
\
Shape [num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
max_num
=
self
.
max_num
cls_scores
=
cls_scores
.
sigmoid
()
scores
,
indexs
=
cls_scores
.
view
(
-
1
).
topk
(
max_num
)
labels
=
indexs
%
self
.
num_classes
bbox_index
=
indexs
//
self
.
num_classes
bbox_preds
=
bbox_preds
[
bbox_index
]
final_box_preds
=
denormalize_bbox
(
bbox_preds
,
self
.
pc_range
)
final_scores
=
scores
final_preds
=
labels
# use score threshold
if
self
.
score_threshold
is
not
None
:
thresh_mask
=
final_scores
>
self
.
score_threshold
tmp_score
=
self
.
score_threshold
while
thresh_mask
.
sum
()
==
0
:
tmp_score
*=
0.9
if
tmp_score
<
0.01
:
thresh_mask
=
final_scores
>
-
1
break
thresh_mask
=
final_scores
>=
tmp_score
if
self
.
post_center_range
is
not
None
:
self
.
post_center_range
=
torch
.
tensor
(
self
.
post_center_range
,
device
=
scores
.
device
)
mask
=
(
final_box_preds
[...,
:
3
]
>=
self
.
post_center_range
[:
3
]).
all
(
1
)
mask
&=
(
final_box_preds
[...,
:
3
]
<=
self
.
post_center_range
[
3
:]).
all
(
1
)
if
self
.
score_threshold
:
mask
&=
thresh_mask
boxes3d
=
final_box_preds
[
mask
]
scores
=
final_scores
[
mask
]
labels
=
final_preds
[
mask
]
predictions_dict
=
{
'bboxes'
:
boxes3d
,
'scores'
:
scores
,
'labels'
:
labels
}
else
:
raise
NotImplementedError
(
'Need to reorganize output as a batch, only '
'support post_center_range is not None for now!'
)
return
predictions_dict
def
decode
(
self
,
preds_dicts
):
"""Decode bboxes.
Args:
all_cls_scores (Tensor): Outputs from the classification head,
\
shape [nb_dec, bs, num_query, cls_out_channels]. Note
\
cls_out_channels should includes background.
all_bbox_preds (Tensor): Sigmoid outputs from the regression
\
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy).
\
Shape [nb_dec, bs, num_query, 9].
Returns:
list[dict]: Decoded boxes.
"""
all_cls_scores
=
preds_dicts
[
'all_cls_scores'
][
-
1
]
all_bbox_preds
=
preds_dicts
[
'all_bbox_preds'
][
-
1
]
batch_size
=
all_cls_scores
.
size
()[
0
]
predictions_list
=
[]
for
i
in
range
(
batch_size
):
predictions_list
.
append
(
self
.
decode_single
(
all_cls_scores
[
i
],
all_bbox_preds
[
i
]))
return
predictions_list
projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
0 → 100644
View file @
4cd43886
from
mmdet.core.bbox.match_costs
import
build_match_cost
from
.match_cost
import
BBox3DL1Cost
,
SmoothL1Cost
__all__
=
[
'build_match_cost'
,
'BBox3DL1Cost'
,
'SmoothL1Cost'
]
\ No newline at end of file
projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
0 → 100644
View file @
4cd43886
import
torch
import
mmcv
from
mmdet.core.bbox.match_costs.builder
import
MATCH_COST
@
MATCH_COST
.
register_module
()
class
BBox3DL1Cost
(
object
):
"""BBox3DL1Cost.
Args:
weight (int | float, optional): loss_weight
"""
def
__init__
(
self
,
weight
=
1.
):
self
.
weight
=
weight
def
__call__
(
self
,
bbox_pred
,
gt_bboxes
):
"""
Args:
bbox_pred (Tensor): Predicted boxes with normalized coordinates
(cx, cy, w, h), which are all in range [0, 1]. Shape
[num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with normalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: bbox_cost value with weight
"""
bbox_cost
=
torch
.
cdist
(
bbox_pred
,
gt_bboxes
,
p
=
1
)
return
bbox_cost
*
self
.
weight
@
mmcv
.
jit
(
derivate
=
True
,
coderize
=
True
)
#@weighted_loss
def
smooth_l1_loss
(
pred
,
target
,
beta
=
1.0
):
"""Smooth L1 loss.
Args:
pred (torch.Tensor): The prediction.
target (torch.Tensor): The learning target of the prediction.
beta (float, optional): The threshold in the piecewise function.
Defaults to 1.0.
Returns:
torch.Tensor: Calculated loss
"""
assert
beta
>
0
if
target
.
numel
()
==
0
:
return
pred
.
sum
()
*
0
# assert pred.size() == target.size()
diff
=
torch
.
abs
(
pred
-
target
)
loss
=
torch
.
where
(
diff
<
beta
,
0.5
*
diff
*
diff
/
beta
,
diff
-
0.5
*
beta
)
return
loss
.
sum
(
-
1
)
@
MATCH_COST
.
register_module
()
class
SmoothL1Cost
(
object
):
"""SmoothL1Cost.
Args:
weight (int | float, optional): loss weight
Examples:
>>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
>>> import torch
>>> self = IoUCost()
>>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
>>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
>>> self(bboxes, gt_bboxes)
tensor([[-0.1250, 0.1667],
[ 0.1667, -0.5000]])
"""
def
__init__
(
self
,
weight
=
1.
):
self
.
weight
=
weight
def
__call__
(
self
,
input
,
target
):
"""
Args:
bboxes (Tensor): Predicted boxes with unnormalized coordinates
(x1, y1, x2, y2). Shape [num_query, 4].
gt_bboxes (Tensor): Ground truth boxes with unnormalized
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
Returns:
torch.Tensor: iou_cost value with weight
"""
N1
,
C
=
input
.
shape
N2
,
C
=
target
.
shape
input
=
input
.
contiguous
().
view
(
N1
,
C
)[:,
None
,
:]
target
=
target
.
contiguous
().
view
(
N2
,
C
)[
None
,
:,
:]
cost
=
smooth_l1_loss
(
input
,
target
)
return
cost
*
self
.
weight
\ No newline at end of file
projects/mmdet3d_plugin/core/bbox/util.py
0 → 100644
View file @
4cd43886
import
torch
def
normalize_bbox
(
bboxes
,
pc_range
):
cx
=
bboxes
[...,
0
:
1
]
cy
=
bboxes
[...,
1
:
2
]
cz
=
bboxes
[...,
2
:
3
]
w
=
bboxes
[...,
3
:
4
].
log
()
l
=
bboxes
[...,
4
:
5
].
log
()
h
=
bboxes
[...,
5
:
6
].
log
()
rot
=
bboxes
[...,
6
:
7
]
if
bboxes
.
size
(
-
1
)
>
7
:
vx
=
bboxes
[...,
7
:
8
]
vy
=
bboxes
[...,
8
:
9
]
normalized_bboxes
=
torch
.
cat
(
(
cx
,
cy
,
w
,
l
,
cz
,
h
,
rot
.
sin
(),
rot
.
cos
(),
vx
,
vy
),
dim
=-
1
)
else
:
normalized_bboxes
=
torch
.
cat
(
(
cx
,
cy
,
w
,
l
,
cz
,
h
,
rot
.
sin
(),
rot
.
cos
()),
dim
=-
1
)
return
normalized_bboxes
def
denormalize_bbox
(
normalized_bboxes
,
pc_range
):
# rotation
rot_sine
=
normalized_bboxes
[...,
6
:
7
]
rot_cosine
=
normalized_bboxes
[...,
7
:
8
]
rot
=
torch
.
atan2
(
rot_sine
,
rot_cosine
)
# center in the bev
cx
=
normalized_bboxes
[...,
0
:
1
]
cy
=
normalized_bboxes
[...,
1
:
2
]
cz
=
normalized_bboxes
[...,
4
:
5
]
# size
w
=
normalized_bboxes
[...,
2
:
3
]
l
=
normalized_bboxes
[...,
3
:
4
]
h
=
normalized_bboxes
[...,
5
:
6
]
w
=
w
.
exp
()
l
=
l
.
exp
()
h
=
h
.
exp
()
if
normalized_bboxes
.
size
(
-
1
)
>
8
:
# velocity
vx
=
normalized_bboxes
[:,
8
:
9
]
vy
=
normalized_bboxes
[:,
9
:
10
]
denormalized_bboxes
=
torch
.
cat
([
cx
,
cy
,
cz
,
w
,
l
,
h
,
rot
,
vx
,
vy
],
dim
=-
1
)
else
:
denormalized_bboxes
=
torch
.
cat
([
cx
,
cy
,
cz
,
w
,
l
,
h
,
rot
],
dim
=-
1
)
return
denormalized_bboxes
\ No newline at end of file
projects/mmdet3d_plugin/core/evaluation/__init__.py
0 → 100644
View file @
4cd43886
from
.eval_hooks
import
CustomDistEvalHook
\ No newline at end of file
projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
0 → 100644
View file @
4cd43886
# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
# in order to avoid strong version dependency, we did not directly
# inherit EvalHook but BaseDistEvalHook.
import
bisect
import
os.path
as
osp
import
mmcv
import
torch.distributed
as
dist
from
mmcv.runner
import
DistEvalHook
as
BaseDistEvalHook
from
mmcv.runner
import
EvalHook
as
BaseEvalHook
from
torch.nn.modules.batchnorm
import
_BatchNorm
from
mmdet.core.evaluation.eval_hooks
import
DistEvalHook
def
_calc_dynamic_intervals
(
start_interval
,
dynamic_interval_list
):
assert
mmcv
.
is_list_of
(
dynamic_interval_list
,
tuple
)
dynamic_milestones
=
[
0
]
dynamic_milestones
.
extend
(
[
dynamic_interval
[
0
]
for
dynamic_interval
in
dynamic_interval_list
])
dynamic_intervals
=
[
start_interval
]
dynamic_intervals
.
extend
(
[
dynamic_interval
[
1
]
for
dynamic_interval
in
dynamic_interval_list
])
return
dynamic_milestones
,
dynamic_intervals
class
CustomDistEvalHook
(
BaseDistEvalHook
):
def
__init__
(
self
,
*
args
,
dynamic_intervals
=
None
,
**
kwargs
):
super
(
CustomDistEvalHook
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
use_dynamic_intervals
=
dynamic_intervals
is
not
None
if
self
.
use_dynamic_intervals
:
self
.
dynamic_milestones
,
self
.
dynamic_intervals
=
\
_calc_dynamic_intervals
(
self
.
interval
,
dynamic_intervals
)
def
_decide_interval
(
self
,
runner
):
if
self
.
use_dynamic_intervals
:
progress
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
step
=
bisect
.
bisect
(
self
.
dynamic_milestones
,
(
progress
+
1
))
# Dynamically modify the evaluation interval
self
.
interval
=
self
.
dynamic_intervals
[
step
-
1
]
def
before_train_epoch
(
self
,
runner
):
"""Evaluate the model only at the start of training by epoch."""
self
.
_decide_interval
(
runner
)
super
().
before_train_epoch
(
runner
)
def
before_train_iter
(
self
,
runner
):
self
.
_decide_interval
(
runner
)
super
().
before_train_iter
(
runner
)
def
_do_evaluate
(
self
,
runner
):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if
self
.
broadcast_bn_buffer
:
model
=
runner
.
model
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
_BatchNorm
)
and
module
.
track_running_stats
:
dist
.
broadcast
(
module
.
running_var
,
0
)
dist
.
broadcast
(
module
.
running_mean
,
0
)
if
not
self
.
_should_evaluate
(
runner
):
return
tmpdir
=
self
.
tmpdir
if
tmpdir
is
None
:
tmpdir
=
osp
.
join
(
runner
.
work_dir
,
'.eval_hook'
)
from
projects.mmdet3d_plugin.bevformer.apis.test
import
custom_multi_gpu_test
# to solve circlur import
results
=
custom_multi_gpu_test
(
runner
.
model
,
self
.
dataloader
,
tmpdir
=
tmpdir
,
gpu_collect
=
self
.
gpu_collect
)
if
runner
.
rank
==
0
:
print
(
'
\n
'
)
runner
.
log_buffer
.
output
[
'eval_iter_num'
]
=
len
(
self
.
dataloader
)
key_score
=
self
.
evaluate
(
runner
,
results
)
if
self
.
save_best
:
self
.
_save_ckpt
(
runner
,
key_score
)
projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
0 → 100644
View file @
4cd43886
# Copyright (c) OpenMMLab. All rights reserved.
r
"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
try
:
from
waymo_open_dataset
import
dataset_pb2
as
open_dataset
import
mmcv
import
numpy
as
np
import
tensorflow
as
tf
from
glob
import
glob
from
os.path
import
join
from
waymo_open_dataset
import
label_pb2
from
waymo_open_dataset.protos
import
metrics_pb2
except
ImportError
:
#pass
raise
ImportError
(
'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
'to install the official devkit first.'
)
class
KITTI2Waymo
(
object
):
"""KITTI predictions to Waymo converter.
This class serves as the converter to change predictions from KITTI to
Waymo format.
Args:
kitti_result_files (list[dict]): Predictions in KITTI format.
waymo_tfrecords_dir (str): Directory to load waymo raw data.
waymo_results_save_dir (str): Directory to save converted predictions
in waymo format (.bin files).
waymo_results_final_path (str): Path to save combined
predictions in waymo format (.bin file), like 'a/b/c.bin'.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (str): Number of parallel processes.
"""
def
__init__
(
self
,
kitti_result_files
,
waymo_tfrecords_dir
,
waymo_results_save_dir
,
waymo_results_final_path
,
prefix
,
workers
=
64
):
self
.
kitti_result_files
=
kitti_result_files
self
.
waymo_tfrecords_dir
=
waymo_tfrecords_dir
self
.
waymo_results_save_dir
=
waymo_results_save_dir
self
.
waymo_results_final_path
=
waymo_results_final_path
self
.
prefix
=
prefix
self
.
workers
=
int
(
workers
)
self
.
name2idx
=
{}
for
idx
,
result
in
enumerate
(
kitti_result_files
):
if
len
(
result
[
'sample_idx'
])
>
0
:
self
.
name2idx
[
str
(
result
[
'sample_idx'
][
0
])]
=
idx
# turn on eager execution for older tensorflow versions
if
int
(
tf
.
__version__
.
split
(
'.'
)[
0
])
<
2
:
tf
.
enable_eager_execution
()
self
.
k2w_cls_map
=
{
'Car'
:
label_pb2
.
Label
.
TYPE_VEHICLE
,
'Pedestrian'
:
label_pb2
.
Label
.
TYPE_PEDESTRIAN
,
'Sign'
:
label_pb2
.
Label
.
TYPE_SIGN
,
'Cyclist'
:
label_pb2
.
Label
.
TYPE_CYCLIST
,
}
self
.
T_ref_to_front_cam
=
np
.
array
([[
0.0
,
0.0
,
1.0
,
0.0
],
[
-
1.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
-
1.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
1.0
]])
self
.
get_file_names
()
self
.
create_folder
()
def
get_file_names
(
self
):
"""Get file names of waymo raw data."""
self
.
waymo_tfrecord_pathnames
=
sorted
(
glob
(
join
(
self
.
waymo_tfrecords_dir
,
'*.tfrecord'
)))
print
(
len
(
self
.
waymo_tfrecord_pathnames
),
'tfrecords found.'
)
def
create_folder
(
self
):
"""Create folder for data conversion."""
mmcv
.
mkdir_or_exist
(
self
.
waymo_results_save_dir
)
def
parse_objects
(
self
,
kitti_result
,
T_k2w
,
context_name
,
frame_timestamp_micros
):
"""Parse one prediction with several instances in kitti format and
convert them to `Object` proto.
Args:
kitti_result (dict): Predictions in kitti format.
- name (np.ndarray): Class labels of predictions.
- dimensions (np.ndarray): Height, width, length of boxes.
- location (np.ndarray): Bottom center of boxes (x, y, z).
- rotation_y (np.ndarray): Orientation of boxes.
- score (np.ndarray): Scores of predictions.
T_k2w (np.ndarray): Transformation matrix from kitti to waymo.
context_name (str): Context name of the frame.
frame_timestamp_micros (int): Frame timestamp.
Returns:
:obj:`Object`: Predictions in waymo dataset Object proto.
"""
def
parse_one_object
(
instance_idx
):
"""Parse one instance in kitti format and convert them to `Object`
proto.
Args:
instance_idx (int): Index of the instance to be converted.
Returns:
:obj:`Object`: Predicted instance in waymo dataset
\
Object proto.
"""
cls
=
kitti_result
[
'name'
][
instance_idx
]
length
=
round
(
kitti_result
[
'dimensions'
][
instance_idx
,
0
],
4
)
height
=
round
(
kitti_result
[
'dimensions'
][
instance_idx
,
1
],
4
)
width
=
round
(
kitti_result
[
'dimensions'
][
instance_idx
,
2
],
4
)
x
=
round
(
kitti_result
[
'location'
][
instance_idx
,
0
],
4
)
y
=
round
(
kitti_result
[
'location'
][
instance_idx
,
1
],
4
)
z
=
round
(
kitti_result
[
'location'
][
instance_idx
,
2
],
4
)
rotation_y
=
round
(
kitti_result
[
'rotation_y'
][
instance_idx
],
4
)
score
=
round
(
kitti_result
[
'score'
][
instance_idx
],
4
)
# y: downwards; move box origin from bottom center (kitti) to
# true center (waymo)
y
-=
height
/
2
# frame transformation: kitti -> waymo
x
,
y
,
z
=
self
.
transform
(
T_k2w
,
x
,
y
,
z
)
# different conventions
heading
=
-
(
rotation_y
+
np
.
pi
/
2
)
while
heading
<
-
np
.
pi
:
heading
+=
2
*
np
.
pi
while
heading
>
np
.
pi
:
heading
-=
2
*
np
.
pi
box
=
label_pb2
.
Label
.
Box
()
box
.
center_x
=
x
box
.
center_y
=
y
box
.
center_z
=
z
box
.
length
=
length
box
.
width
=
width
box
.
height
=
height
box
.
heading
=
heading
o
=
metrics_pb2
.
Object
()
o
.
object
.
box
.
CopyFrom
(
box
)
o
.
object
.
type
=
self
.
k2w_cls_map
[
cls
]
o
.
score
=
score
o
.
context_name
=
context_name
o
.
frame_timestamp_micros
=
frame_timestamp_micros
return
o
objects
=
metrics_pb2
.
Objects
()
for
instance_idx
in
range
(
len
(
kitti_result
[
'name'
])):
o
=
parse_one_object
(
instance_idx
)
objects
.
objects
.
append
(
o
)
return
objects
def
convert_one
(
self
,
file_idx
):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
file_pathname
=
self
.
waymo_tfrecord_pathnames
[
file_idx
]
file_data
=
tf
.
data
.
TFRecordDataset
(
file_pathname
,
compression_type
=
''
)
for
frame_num
,
frame_data
in
enumerate
(
file_data
):
frame
=
open_dataset
.
Frame
()
frame
.
ParseFromString
(
bytearray
(
frame_data
.
numpy
()))
filename
=
f
'
{
self
.
prefix
}{
file_idx
:
03
d
}{
frame_num
:
03
d
}
'
for
camera
in
frame
.
context
.
camera_calibrations
:
# FRONT = 1, see dataset.proto for details
if
camera
.
name
==
1
:
T_front_cam_to_vehicle
=
np
.
array
(
camera
.
extrinsic
.
transform
).
reshape
(
4
,
4
)
T_k2w
=
T_front_cam_to_vehicle
@
self
.
T_ref_to_front_cam
context_name
=
frame
.
context
.
name
frame_timestamp_micros
=
frame
.
timestamp_micros
if
filename
in
self
.
name2idx
:
kitti_result
=
\
self
.
kitti_result_files
[
self
.
name2idx
[
filename
]]
objects
=
self
.
parse_objects
(
kitti_result
,
T_k2w
,
context_name
,
frame_timestamp_micros
)
else
:
print
(
filename
,
'not found.(bevformer)'
)
objects
=
metrics_pb2
.
Objects
()
with
open
(
join
(
self
.
waymo_results_save_dir
,
f
'
{
filename
}
.bin'
),
'wb'
)
as
f
:
f
.
write
(
objects
.
SerializeToString
())
def
convert
(
self
):
"""Convert action."""
print
(
'Start converting ...'
)
mmcv
.
track_parallel_progress
(
self
.
convert_one
,
range
(
len
(
self
)),
self
.
workers
)
print
(
'
\n
Finished ...'
)
# combine all files into one .bin
pathnames
=
sorted
(
glob
(
join
(
self
.
waymo_results_save_dir
,
'*.bin'
)))
combined
=
self
.
combine
(
pathnames
)
with
open
(
self
.
waymo_results_final_path
,
'wb'
)
as
f
:
f
.
write
(
combined
.
SerializeToString
())
def
__len__
(
self
):
"""Length of the filename list."""
return
len
(
self
.
waymo_tfrecord_pathnames
)
def
transform
(
self
,
T
,
x
,
y
,
z
):
"""Transform the coordinates with matrix T.
Args:
T (np.ndarray): Transformation matrix.
x(float): Coordinate in x axis.
y(float): Coordinate in y axis.
z(float): Coordinate in z axis.
Returns:
list: Coordinates after transformation.
"""
pt_bef
=
np
.
array
([
x
,
y
,
z
,
1.0
]).
reshape
(
4
,
1
)
pt_aft
=
np
.
matmul
(
T
,
pt_bef
)
return
pt_aft
[:
3
].
flatten
().
tolist
()
def
combine
(
self
,
pathnames
):
"""Combine predictions in waymo format for each sample together.
Args:
pathnames (str): Paths to save predictions.
Returns:
:obj:`Objects`: Combined predictions in Objects proto.
"""
combined
=
metrics_pb2
.
Objects
()
for
pathname
in
pathnames
:
objects
=
metrics_pb2
.
Objects
()
with
open
(
pathname
,
'rb'
)
as
f
:
objects
.
ParseFromString
(
f
.
read
())
for
o
in
objects
.
objects
:
combined
.
objects
.
append
(
o
)
return
combined
\ No newline at end of file
projects/mmdet3d_plugin/datasets/__init__.py
0 → 100644
View file @
4cd43886
from
.nuscenes_dataset
import
CustomNuScenesDataset
from
.nuscenes_dataset_v2
import
CustomNuScenesDatasetV2
from
.builder
import
custom_build_dataset
__all__
=
[
'CustomNuScenesDataset'
,
'CustomNuScenesDatasetV2'
,
]
projects/mmdet3d_plugin/datasets/builder.py
0 → 100644
View file @
4cd43886
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
platform
import
random
from
functools
import
partial
import
numpy
as
np
from
mmcv.parallel
import
collate
from
mmcv.runner
import
get_dist_info
from
mmcv.utils
import
Registry
,
build_from_cfg
from
torch.utils.data
import
DataLoader
from
mmdet.datasets.samplers
import
GroupSampler
from
projects.mmdet3d_plugin.datasets.samplers.group_sampler
import
DistributedGroupSampler
from
projects.mmdet3d_plugin.datasets.samplers.distributed_sampler
import
DistributedSampler
from
projects.mmdet3d_plugin.datasets.samplers.sampler
import
build_sampler
# import torch
def
build_dataloader
(
dataset
,
samples_per_gpu
,
workers_per_gpu
,
num_gpus
=
1
,
dist
=
True
,
shuffle
=
True
,
seed
=
None
,
shuffler_sampler
=
None
,
nonshuffler_sampler
=
None
,
**
kwargs
):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank
,
world_size
=
get_dist_info
()
if
dist
:
# DistributedGroupSampler will definitely shuffle the data to satisfy
# that images on each GPU are in the same group
if
shuffle
:
sampler
=
build_sampler
(
shuffler_sampler
if
shuffler_sampler
is
not
None
else
dict
(
type
=
'DistributedGroupSampler'
),
dict
(
dataset
=
dataset
,
samples_per_gpu
=
samples_per_gpu
,
num_replicas
=
world_size
,
rank
=
rank
,
seed
=
seed
)
)
else
:
sampler
=
build_sampler
(
nonshuffler_sampler
if
nonshuffler_sampler
is
not
None
else
dict
(
type
=
'DistributedSampler'
),
dict
(
dataset
=
dataset
,
num_replicas
=
world_size
,
rank
=
rank
,
shuffle
=
shuffle
,
seed
=
seed
)
)
batch_size
=
samples_per_gpu
num_workers
=
workers_per_gpu
else
:
# assert False, 'not support in bevformer'
print
(
'WARNING!!!!, Only can be used for obtain inference speed!!!!'
)
sampler
=
GroupSampler
(
dataset
,
samples_per_gpu
)
if
shuffle
else
None
batch_size
=
num_gpus
*
samples_per_gpu
num_workers
=
num_gpus
*
workers_per_gpu
init_fn
=
partial
(
worker_init_fn
,
num_workers
=
num_workers
,
rank
=
rank
,
seed
=
seed
)
if
seed
is
not
None
else
None
data_loader
=
DataLoader
(
dataset
,
batch_size
=
batch_size
,
sampler
=
sampler
,
num_workers
=
num_workers
,
collate_fn
=
partial
(
collate
,
samples_per_gpu
=
samples_per_gpu
),
pin_memory
=
False
,
worker_init_fn
=
init_fn
,
persistent_workers
=
(
num_workers
>
0
),
**
kwargs
)
# if to_channels_last:
# original_collate_fn = data_loader.collate_fn
# def channels_last_collate(batch):
# data = original_collate_fn(batch)
# print("===============================channels_last=================================================")
# if isinstance(data, dict):
# if 'img' in data and isinstance(data['img'], torch.Tensor):
# data['img'] = data['img'].contiguous(memory_format=torch.channels_last)
# elif isinstance(data, list):
# for item in data:
# if 'img' in item and isinstance(item['img'], torch.Tensor):
# item['img'] = item['img'].contiguous(memory_format=torch.channels_last)
# return data
# data_loader.collate_fn = channels_last_collate
return
data_loader
def
worker_init_fn
(
worker_id
,
num_workers
,
rank
,
seed
):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed
=
num_workers
*
rank
+
worker_id
+
seed
np
.
random
.
seed
(
worker_seed
)
random
.
seed
(
worker_seed
)
# Copyright (c) OpenMMLab. All rights reserved.
import
platform
from
mmcv.utils
import
Registry
,
build_from_cfg
from
mmdet.datasets
import
DATASETS
from
mmdet.datasets.builder
import
_concat_dataset
if
platform
.
system
()
!=
'Windows'
:
# https://github.com/pytorch/pytorch/issues/973
import
resource
rlimit
=
resource
.
getrlimit
(
resource
.
RLIMIT_NOFILE
)
base_soft_limit
=
rlimit
[
0
]
hard_limit
=
rlimit
[
1
]
soft_limit
=
min
(
max
(
4096
,
base_soft_limit
),
hard_limit
)
resource
.
setrlimit
(
resource
.
RLIMIT_NOFILE
,
(
soft_limit
,
hard_limit
))
OBJECTSAMPLERS
=
Registry
(
'Object sampler'
)
def
custom_build_dataset
(
cfg
,
default_args
=
None
):
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
from
mmdet.datasets.dataset_wrappers
import
(
ClassBalancedDataset
,
ConcatDataset
,
RepeatDataset
)
if
isinstance
(
cfg
,
(
list
,
tuple
)):
dataset
=
ConcatDataset
([
custom_build_dataset
(
c
,
default_args
)
for
c
in
cfg
])
elif
cfg
[
'type'
]
==
'ConcatDataset'
:
dataset
=
ConcatDataset
(
[
custom_build_dataset
(
c
,
default_args
)
for
c
in
cfg
[
'datasets'
]],
cfg
.
get
(
'separate_eval'
,
True
))
elif
cfg
[
'type'
]
==
'RepeatDataset'
:
dataset
=
RepeatDataset
(
custom_build_dataset
(
cfg
[
'dataset'
],
default_args
),
cfg
[
'times'
])
elif
cfg
[
'type'
]
==
'ClassBalancedDataset'
:
dataset
=
ClassBalancedDataset
(
custom_build_dataset
(
cfg
[
'dataset'
],
default_args
),
cfg
[
'oversample_thr'
])
elif
cfg
[
'type'
]
==
'CBGSDataset'
:
dataset
=
CBGSDataset
(
custom_build_dataset
(
cfg
[
'dataset'
],
default_args
))
elif
isinstance
(
cfg
.
get
(
'ann_file'
),
(
list
,
tuple
)):
dataset
=
_concat_dataset
(
cfg
,
default_args
)
else
:
dataset
=
build_from_cfg
(
cfg
,
DATASETS
,
default_args
)
return
dataset
projects/mmdet3d_plugin/datasets/nuscenes_dataset.py
0 → 100644
View file @
4cd43886
import
copy
import
numpy
as
np
from
mmdet.datasets
import
DATASETS
from
mmdet3d.datasets
import
NuScenesDataset
import
mmcv
from
os
import
path
as
osp
from
mmdet.datasets
import
DATASETS
import
torch
import
numpy
as
np
from
nuscenes.eval.common.utils
import
quaternion_yaw
,
Quaternion
from
.nuscnes_eval
import
NuScenesEval_custom
from
projects.mmdet3d_plugin.models.utils.visual
import
save_tensor
from
mmcv.parallel
import
DataContainer
as
DC
import
random
@
DATASETS
.
register_module
()
class
CustomNuScenesDataset
(
NuScenesDataset
):
r
"""NuScenes Dataset.
This datset only add camera intrinsics and extrinsics to the results.
"""
def
__init__
(
self
,
queue_length
=
4
,
bev_size
=
(
200
,
200
),
overlap_test
=
False
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
queue_length
=
queue_length
self
.
overlap_test
=
overlap_test
self
.
bev_size
=
bev_size
def
prepare_train_data
(
self
,
index
):
"""
Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
queue
=
[]
index_list
=
list
(
range
(
index
-
self
.
queue_length
,
index
))
random
.
shuffle
(
index_list
)
index_list
=
sorted
(
index_list
[
1
:])
index_list
.
append
(
index
)
for
i
in
index_list
:
i
=
max
(
0
,
i
)
input_dict
=
self
.
get_data_info
(
i
)
if
input_dict
is
None
:
return
None
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
if
self
.
filter_empty_gt
and
\
(
example
is
None
or
~
(
example
[
'gt_labels_3d'
].
_data
!=
-
1
).
any
()):
return
None
queue
.
append
(
example
)
return
self
.
union2one
(
queue
)
def
union2one
(
self
,
queue
):
imgs_list
=
[
each
[
'img'
].
data
for
each
in
queue
]
metas_map
=
{}
prev_scene_token
=
None
prev_pos
=
None
prev_angle
=
None
for
i
,
each
in
enumerate
(
queue
):
metas_map
[
i
]
=
each
[
'img_metas'
].
data
if
metas_map
[
i
][
'scene_token'
]
!=
prev_scene_token
:
metas_map
[
i
][
'prev_bev_exists'
]
=
False
prev_scene_token
=
metas_map
[
i
][
'scene_token'
]
prev_pos
=
copy
.
deepcopy
(
metas_map
[
i
][
'can_bus'
][:
3
])
prev_angle
=
copy
.
deepcopy
(
metas_map
[
i
][
'can_bus'
][
-
1
])
metas_map
[
i
][
'can_bus'
][:
3
]
=
0
metas_map
[
i
][
'can_bus'
][
-
1
]
=
0
else
:
metas_map
[
i
][
'prev_bev_exists'
]
=
True
tmp_pos
=
copy
.
deepcopy
(
metas_map
[
i
][
'can_bus'
][:
3
])
tmp_angle
=
copy
.
deepcopy
(
metas_map
[
i
][
'can_bus'
][
-
1
])
metas_map
[
i
][
'can_bus'
][:
3
]
-=
prev_pos
metas_map
[
i
][
'can_bus'
][
-
1
]
-=
prev_angle
prev_pos
=
copy
.
deepcopy
(
tmp_pos
)
prev_angle
=
copy
.
deepcopy
(
tmp_angle
)
queue
[
-
1
][
'img'
]
=
DC
(
torch
.
stack
(
imgs_list
),
cpu_only
=
False
,
stack
=
True
)
queue
[
-
1
][
'img_metas'
]
=
DC
(
metas_map
,
cpu_only
=
True
)
queue
=
queue
[
-
1
]
return
queue
def
get_data_info
(
self
,
index
):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Data information that will be passed to the data
\
preprocessing pipelines. It includes the following keys:
- sample_idx (str): Sample index.
- pts_filename (str): Filename of point clouds.
- sweeps (list[dict]): Infos of sweeps.
- timestamp (float): Sample timestamp.
- img_filename (str, optional): Image filename.
- lidar2img (list[np.ndarray], optional): Transformations
\
from lidar to different cameras.
- ann_info (dict): Annotation info.
"""
info
=
self
.
data_infos
[
index
]
# standard protocal modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
'token'
],
pts_filename
=
info
[
'lidar_path'
],
sweeps
=
info
[
'sweeps'
],
ego2global_translation
=
info
[
'ego2global_translation'
],
ego2global_rotation
=
info
[
'ego2global_rotation'
],
prev_idx
=
info
[
'prev'
],
next_idx
=
info
[
'next'
],
scene_token
=
info
[
'scene_token'
],
can_bus
=
info
[
'can_bus'
],
frame_idx
=
info
[
'frame_idx'
],
timestamp
=
info
[
'timestamp'
]
/
1e6
,
)
if
self
.
modality
[
'use_camera'
]:
image_paths
=
[]
lidar2img_rts
=
[]
lidar2cam_rts
=
[]
cam_intrinsics
=
[]
for
cam_type
,
cam_info
in
info
[
'cams'
].
items
():
image_paths
.
append
(
cam_info
[
'data_path'
])
# obtain lidar to image transformation matrix
lidar2cam_r
=
np
.
linalg
.
inv
(
cam_info
[
'sensor2lidar_rotation'
])
lidar2cam_t
=
cam_info
[
'sensor2lidar_translation'
]
@
lidar2cam_r
.
T
lidar2cam_rt
=
np
.
eye
(
4
)
lidar2cam_rt
[:
3
,
:
3
]
=
lidar2cam_r
.
T
lidar2cam_rt
[
3
,
:
3
]
=
-
lidar2cam_t
intrinsic
=
cam_info
[
'cam_intrinsic'
]
viewpad
=
np
.
eye
(
4
)
viewpad
[:
intrinsic
.
shape
[
0
],
:
intrinsic
.
shape
[
1
]]
=
intrinsic
lidar2img_rt
=
(
viewpad
@
lidar2cam_rt
.
T
)
lidar2img_rts
.
append
(
lidar2img_rt
)
cam_intrinsics
.
append
(
viewpad
)
lidar2cam_rts
.
append
(
lidar2cam_rt
.
T
)
input_dict
.
update
(
dict
(
img_filename
=
image_paths
,
lidar2img
=
lidar2img_rts
,
cam_intrinsic
=
cam_intrinsics
,
lidar2cam
=
lidar2cam_rts
,
))
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
rotation
=
Quaternion
(
input_dict
[
'ego2global_rotation'
])
translation
=
input_dict
[
'ego2global_translation'
]
can_bus
=
input_dict
[
'can_bus'
]
can_bus
[:
3
]
=
translation
can_bus
[
3
:
7
]
=
rotation
patch_angle
=
quaternion_yaw
(
rotation
)
/
np
.
pi
*
180
if
patch_angle
<
0
:
patch_angle
+=
360
can_bus
[
-
2
]
=
patch_angle
/
180
*
np
.
pi
can_bus
[
-
1
]
=
patch_angle
return
input_dict
def
__getitem__
(
self
,
idx
):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if
self
.
test_mode
:
return
self
.
prepare_test_data
(
idx
)
while
True
:
data
=
self
.
prepare_train_data
(
idx
)
if
data
is
None
:
idx
=
self
.
_rand_another
(
idx
)
continue
return
data
def
_evaluate_single
(
self
,
result_path
,
logger
=
None
,
metric
=
'bbox'
,
result_name
=
'pts_bbox'
):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from
nuscenes
import
NuScenes
self
.
nusc
=
NuScenes
(
version
=
self
.
version
,
dataroot
=
self
.
data_root
,
verbose
=
True
)
output_dir
=
osp
.
join
(
*
osp
.
split
(
result_path
)[:
-
1
])
eval_set_map
=
{
'v1.0-mini'
:
'mini_val'
,
'v1.0-trainval'
:
'val'
,
}
self
.
nusc_eval
=
NuScenesEval_custom
(
self
.
nusc
,
config
=
self
.
eval_detection_configs
,
result_path
=
result_path
,
eval_set
=
eval_set_map
[
self
.
version
],
output_dir
=
output_dir
,
verbose
=
True
,
overlap_test
=
self
.
overlap_test
,
data_infos
=
self
.
data_infos
)
self
.
nusc_eval
.
main
(
plot_examples
=
0
,
render_curves
=
False
)
# record metrics
metrics
=
mmcv
.
load
(
osp
.
join
(
output_dir
,
'metrics_summary.json'
))
detail
=
dict
()
metric_prefix
=
f
'
{
result_name
}
_NuScenes'
for
name
in
self
.
CLASSES
:
for
k
,
v
in
metrics
[
'label_aps'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_AP_dist_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'label_tp_errors'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'tp_errors'
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}'
.
format
(
metric_prefix
,
self
.
ErrNameMapping
[
k
])]
=
val
detail
[
'{}/NDS'
.
format
(
metric_prefix
)]
=
metrics
[
'nd_score'
]
detail
[
'{}/mAP'
.
format
(
metric_prefix
)]
=
metrics
[
'mean_ap'
]
return
detail
projects/mmdet3d_plugin/datasets/nuscenes_dataset_v2.py
0 → 100644
View file @
4cd43886
import
copy
from
mmdet3d.datasets
import
NuScenesDataset
import
mmcv
from
os
import
path
as
osp
from
mmdet.datasets
import
DATASETS
import
torch
import
numpy
as
np
from
nuscenes.eval.common.utils
import
quaternion_yaw
,
Quaternion
from
.nuscnes_eval
import
NuScenesEval_custom
from
mmcv.parallel
import
DataContainer
as
DC
from
collections
import
defaultdict
,
OrderedDict
from
projects.mmdet3d_plugin.dd3d.datasets.nuscenes
import
NuscenesDataset
as
DD3DNuscenesDataset
@
DATASETS
.
register_module
()
class
CustomNuScenesDatasetV2
(
NuScenesDataset
):
def
__init__
(
self
,
frames
=
(),
mono_cfg
=
None
,
overlap_test
=
False
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
frames
=
frames
self
.
queue_length
=
len
(
frames
)
self
.
overlap_test
=
overlap_test
self
.
mono_cfg
=
mono_cfg
if
not
self
.
test_mode
and
mono_cfg
is
not
None
:
self
.
mono_dataset
=
DD3DNuscenesDataset
(
**
mono_cfg
)
def
prepare_test_data
(
self
,
index
):
"""Prepare data for testing.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Testing data dict of the corresponding index.
"""
data_queue
=
OrderedDict
()
input_dict
=
self
.
get_data_info
(
index
)
cur_scene_token
=
input_dict
[
'scene_token'
]
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
data_queue
[
0
]
=
example
for
frame_idx
in
self
.
frames
:
chosen_idx
=
index
+
frame_idx
if
frame_idx
==
0
or
chosen_idx
<
0
or
chosen_idx
>=
len
(
self
.
data_infos
):
continue
info
=
self
.
data_infos
[
chosen_idx
]
input_dict
=
self
.
prepare_input_dict
(
info
)
if
input_dict
[
'scene_token'
]
==
cur_scene_token
:
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
data_queue
[
frame_idx
]
=
example
data_queue
=
OrderedDict
(
sorted
(
data_queue
.
items
()))
ret
=
defaultdict
(
list
)
for
i
in
range
(
len
(
data_queue
[
0
][
'img'
])):
single_aug_data_queue
=
{}
for
t
in
data_queue
.
keys
():
single_example
=
{}
for
key
,
value
in
data_queue
[
t
].
items
():
single_example
[
key
]
=
value
[
i
]
single_aug_data_queue
[
t
]
=
single_example
single_aug_data_queue
=
OrderedDict
(
sorted
(
single_aug_data_queue
.
items
()))
single_aug_sample
=
self
.
union2one
(
single_aug_data_queue
)
for
key
,
value
in
single_aug_sample
.
items
():
ret
[
key
].
append
(
value
)
return
ret
def
prepare_train_data
(
self
,
index
):
"""
Training data preparation.
Args:
index (int): Index for accessing the target data.
Returns:
dict: Training data dict of the corresponding index.
"""
data_queue
=
OrderedDict
()
input_dict
=
self
.
get_data_info
(
index
)
if
input_dict
is
None
:
return
None
cur_scene_token
=
input_dict
[
'scene_token'
]
# cur_frame_idx = input_dict['frame_idx']
ann_info
=
copy
.
deepcopy
(
input_dict
[
'ann_info'
])
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
if
self
.
filter_empty_gt
and
\
(
example
is
None
or
~
(
example
[
'gt_labels_3d'
].
_data
!=
-
1
).
any
()):
return
None
data_queue
[
0
]
=
example
aug_param
=
copy
.
deepcopy
(
example
[
'aug_param'
])
if
'aug_param'
in
example
else
{}
# frame_idx_to_idx = self.scene_to_frame_idx_to_idx[cur_scene_token]
for
frame_idx
in
self
.
frames
:
chosen_idx
=
index
+
frame_idx
if
frame_idx
==
0
or
chosen_idx
<
0
or
chosen_idx
>=
len
(
self
.
data_infos
):
continue
info
=
self
.
data_infos
[
chosen_idx
]
input_dict
=
self
.
prepare_input_dict
(
info
)
if
input_dict
[
'scene_token'
]
==
cur_scene_token
:
input_dict
[
'ann_info'
]
=
copy
.
deepcopy
(
ann_info
)
# only for pipeline, should never be used
self
.
pre_pipeline
(
input_dict
)
input_dict
[
'aug_param'
]
=
copy
.
deepcopy
(
aug_param
)
example
=
self
.
pipeline
(
input_dict
)
data_queue
[
frame_idx
]
=
example
data_queue
=
OrderedDict
(
sorted
(
data_queue
.
items
()))
return
self
.
union2one
(
data_queue
)
def
union2one
(
self
,
queue
:
dict
):
"""
convert sample queue into one single sample.
"""
imgs_list
=
[
each
[
'img'
].
data
for
each
in
queue
.
values
()]
lidar2ego
=
np
.
eye
(
4
,
dtype
=
np
.
float32
)
lidar2ego
[:
3
,
:
3
]
=
Quaternion
(
queue
[
0
][
'lidar2ego_rotation'
]).
rotation_matrix
lidar2ego
[:
3
,
3
]
=
queue
[
0
][
'lidar2ego_translation'
]
egocurr2global
=
np
.
eye
(
4
,
dtype
=
np
.
float32
)
egocurr2global
[:
3
,:
3
]
=
Quaternion
(
queue
[
0
][
'ego2global_rotation'
]).
rotation_matrix
egocurr2global
[:
3
,
3
]
=
queue
[
0
][
'ego2global_translation'
]
metas_map
=
{}
for
i
,
each
in
queue
.
items
():
metas_map
[
i
]
=
each
[
'img_metas'
].
data
metas_map
[
i
][
'timestamp'
]
=
each
[
'timestamp'
]
if
'aug_param'
in
each
:
metas_map
[
i
][
'aug_param'
]
=
each
[
'aug_param'
]
if
i
==
0
:
metas_map
[
i
][
'lidaradj2lidarcurr'
]
=
None
else
:
egoadj2global
=
np
.
eye
(
4
,
dtype
=
np
.
float32
)
egoadj2global
[:
3
,:
3
]
=
Quaternion
(
each
[
'ego2global_rotation'
]).
rotation_matrix
egoadj2global
[:
3
,
3
]
=
each
[
'ego2global_translation'
]
lidaradj2lidarcurr
=
np
.
linalg
.
inv
(
lidar2ego
)
@
np
.
linalg
.
inv
(
egocurr2global
)
@
egoadj2global
@
lidar2ego
metas_map
[
i
][
'lidaradj2lidarcurr'
]
=
lidaradj2lidarcurr
for
i_cam
in
range
(
len
(
metas_map
[
i
][
'lidar2img'
])):
metas_map
[
i
][
'lidar2img'
][
i_cam
]
=
metas_map
[
i
][
'lidar2img'
][
i_cam
]
@
np
.
linalg
.
inv
(
lidaradj2lidarcurr
)
queue
[
0
][
'img'
]
=
DC
(
torch
.
stack
(
imgs_list
),
cpu_only
=
False
,
stack
=
True
)
queue
[
0
][
'img_metas'
]
=
DC
(
metas_map
,
cpu_only
=
True
)
queue
=
queue
[
0
]
return
queue
def
prepare_input_dict
(
self
,
info
):
# standard protocal modified from SECOND.Pytorch
input_dict
=
dict
(
sample_idx
=
info
[
'token'
],
pts_filename
=
info
[
'lidar_path'
],
sweeps
=
info
[
'sweeps'
],
ego2global_translation
=
info
[
'ego2global_translation'
],
ego2global_rotation
=
info
[
'ego2global_rotation'
],
lidar2ego_translation
=
info
[
'lidar2ego_translation'
],
lidar2ego_rotation
=
info
[
'lidar2ego_rotation'
],
prev
=
info
[
'prev'
],
next
=
info
[
'next'
],
scene_token
=
info
[
'scene_token'
],
frame_idx
=
info
[
'frame_idx'
],
timestamp
=
info
[
'timestamp'
]
/
1e6
,
)
if
self
.
modality
[
'use_camera'
]:
image_paths
=
[]
lidar2img_rts
=
[]
lidar2cam_rts
=
[]
cam_intrinsics
=
[]
for
cam_type
,
cam_info
in
info
[
'cams'
].
items
():
image_paths
.
append
(
cam_info
[
'data_path'
])
# obtain lidar to image transformation matrix
lidar2cam_r
=
np
.
linalg
.
inv
(
cam_info
[
'sensor2lidar_rotation'
])
lidar2cam_t
=
cam_info
[
'sensor2lidar_translation'
]
@
lidar2cam_r
.
T
lidar2cam_rt
=
np
.
eye
(
4
)
lidar2cam_rt
[:
3
,
:
3
]
=
lidar2cam_r
.
T
lidar2cam_rt
[
3
,
:
3
]
=
-
lidar2cam_t
intrinsic
=
cam_info
[
'cam_intrinsic'
]
viewpad
=
np
.
eye
(
4
)
viewpad
[:
intrinsic
.
shape
[
0
],
:
intrinsic
.
shape
[
1
]]
=
intrinsic
lidar2img_rt
=
(
viewpad
@
lidar2cam_rt
.
T
)
lidar2img_rts
.
append
(
lidar2img_rt
)
cam_intrinsics
.
append
(
viewpad
)
lidar2cam_rts
.
append
(
lidar2cam_rt
.
T
)
input_dict
.
update
(
dict
(
img_filename
=
image_paths
,
lidar2img
=
lidar2img_rts
,
cam2img
=
cam_intrinsics
,
lidar2cam
=
lidar2cam_rts
,
))
return
input_dict
def
filter_crowd_annotations
(
self
,
data_dict
):
for
ann
in
data_dict
[
"annotations"
]:
if
ann
.
get
(
"iscrowd"
,
0
)
==
0
:
return
True
return
False
def
get_data_info
(
self
,
index
):
info
=
self
.
data_infos
[
index
]
input_dict
=
self
.
prepare_input_dict
(
info
)
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
if
not
self
.
test_mode
and
self
.
mono_cfg
is
not
None
:
if
input_dict
is
None
:
return
None
info
=
self
.
data_infos
[
index
]
img_ids
=
[]
for
cam_type
,
cam_info
in
info
[
'cams'
].
items
():
img_ids
.
append
(
cam_info
[
'sample_data_token'
])
mono_input_dict
=
[];
mono_ann_index
=
[]
for
i
,
img_id
in
enumerate
(
img_ids
):
tmp_dict
=
self
.
mono_dataset
.
getitem_by_datumtoken
(
img_id
)
if
tmp_dict
is
not
None
:
if
self
.
filter_crowd_annotations
(
tmp_dict
):
mono_input_dict
.
append
(
tmp_dict
)
mono_ann_index
.
append
(
i
)
# filter empth annotation
if
len
(
mono_ann_index
)
==
0
:
return
None
mono_ann_index
=
DC
(
mono_ann_index
,
cpu_only
=
True
)
input_dict
[
'mono_input_dict'
]
=
mono_input_dict
input_dict
[
'mono_ann_idx'
]
=
mono_ann_index
return
input_dict
def
__getitem__
(
self
,
idx
):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
if
self
.
test_mode
:
return
self
.
prepare_test_data
(
idx
)
while
True
:
data
=
self
.
prepare_train_data
(
idx
)
if
data
is
None
:
idx
=
self
.
_rand_another
(
idx
)
continue
return
data
def
_evaluate_single
(
self
,
result_path
,
logger
=
None
,
metric
=
'bbox'
,
result_name
=
'pts_bbox'
):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'pts_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from
nuscenes
import
NuScenes
self
.
nusc
=
NuScenes
(
version
=
self
.
version
,
dataroot
=
self
.
data_root
,
verbose
=
True
)
output_dir
=
osp
.
join
(
*
osp
.
split
(
result_path
)[:
-
1
])
eval_set_map
=
{
'v1.0-mini'
:
'mini_val'
,
'v1.0-trainval'
:
'val'
,
}
self
.
nusc_eval
=
NuScenesEval_custom
(
self
.
nusc
,
config
=
self
.
eval_detection_configs
,
result_path
=
result_path
,
eval_set
=
eval_set_map
[
self
.
version
],
output_dir
=
output_dir
,
verbose
=
True
,
overlap_test
=
self
.
overlap_test
,
data_infos
=
self
.
data_infos
)
self
.
nusc_eval
.
main
(
plot_examples
=
0
,
render_curves
=
False
)
# record metrics
metrics
=
mmcv
.
load
(
osp
.
join
(
output_dir
,
'metrics_summary.json'
))
detail
=
dict
()
metric_prefix
=
f
'
{
result_name
}
_NuScenes'
for
name
in
self
.
CLASSES
:
for
k
,
v
in
metrics
[
'label_aps'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_AP_dist_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'label_tp_errors'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'tp_errors'
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}'
.
format
(
metric_prefix
,
self
.
ErrNameMapping
[
k
])]
=
val
detail
[
'{}/NDS'
.
format
(
metric_prefix
)]
=
metrics
[
'nd_score'
]
detail
[
'{}/mAP'
.
format
(
metric_prefix
)]
=
metrics
[
'mean_ap'
]
return
detail
\ No newline at end of file
projects/mmdet3d_plugin/datasets/nuscenes_mono_dataset.py
0 → 100644
View file @
4cd43886
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
mmcv
import
numpy
as
np
import
pyquaternion
import
tempfile
import
torch
import
warnings
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
os
import
path
as
osp
from
mmdet3d.core
import
bbox3d2result
,
box3d_multiclass_nms
,
xywhr2xyxyr
from
mmdet.datasets
import
DATASETS
,
CocoDataset
from
mmdet3d.core
import
show_multi_modality_result
from
mmdet3d.core.bbox
import
CameraInstance3DBoxes
,
get_box_type
from
mmdet3d.datasets.pipelines
import
Compose
from
mmdet3d.datasets.utils
import
extract_result_dict
,
get_loading_pipeline
@
DATASETS
.
register_module
()
class
CustomNuScenesMonoDataset
(
CocoDataset
):
r
"""Monocular 3D detection on NuScenes Dataset.
This class serves as the API for experiments on the NuScenes Dataset.
Please refer to `NuScenes Dataset <https://www.nuscenes.org/download>`_
for data downloading.
Args:
ann_file (str): Path of annotation file.
data_root (str): Path of dataset root.
load_interval (int, optional): Interval of loading the dataset. It is
used to uniformly sample the dataset. Defaults to 1.
with_velocity (bool, optional): Whether include velocity prediction
into the experiments. Defaults to True.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Camera' in this class. Available options includes.
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
eval_version (str, optional): Configuration version of evaluation.
Defaults to 'detection_cvpr_2019'.
use_valid_flag (bool): Whether to use `use_valid_flag` key in the info
file as mask to filter gt_boxes and gt_names. Defaults to False.
version (str, optional): Dataset version. Defaults to 'v1.0-trainval'.
"""
CLASSES
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
DefaultAttribute
=
{
'car'
:
'vehicle.parked'
,
'pedestrian'
:
'pedestrian.moving'
,
'trailer'
:
'vehicle.parked'
,
'truck'
:
'vehicle.parked'
,
'bus'
:
'vehicle.moving'
,
'motorcycle'
:
'cycle.without_rider'
,
'construction_vehicle'
:
'vehicle.parked'
,
'bicycle'
:
'cycle.without_rider'
,
'barrier'
:
''
,
'traffic_cone'
:
''
,
}
# https://github.com/nutonomy/nuscenes-devkit/blob/57889ff20678577025326cfc24e57424a829be0a/python-sdk/nuscenes/eval/detection/evaluate.py#L222 # noqa
ErrNameMapping
=
{
'trans_err'
:
'mATE'
,
'scale_err'
:
'mASE'
,
'orient_err'
:
'mAOE'
,
'vel_err'
:
'mAVE'
,
'attr_err'
:
'mAAE'
}
def
__init__
(
self
,
data_root
,
load_interval
=
1
,
with_velocity
=
True
,
modality
=
None
,
box_type_3d
=
'Camera'
,
eval_version
=
'detection_cvpr_2019'
,
use_valid_flag
=
False
,
overlap_test
=
False
,
version
=
'v1.0-trainval'
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
# overlap_test = True
self
.
data_root
=
data_root
self
.
overlap_test
=
overlap_test
self
.
load_interval
=
load_interval
self
.
with_velocity
=
with_velocity
self
.
modality
=
modality
self
.
box_type_3d
,
self
.
box_mode_3d
=
get_box_type
(
box_type_3d
)
self
.
eval_version
=
eval_version
self
.
use_valid_flag
=
use_valid_flag
self
.
bbox_code_size
=
9
self
.
version
=
version
if
self
.
eval_version
is
not
None
:
from
nuscenes.eval.detection.config
import
config_factory
self
.
eval_detection_configs
=
config_factory
(
self
.
eval_version
)
if
self
.
modality
is
None
:
self
.
modality
=
dict
(
use_camera
=
True
,
use_lidar
=
False
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
def
pre_pipeline
(
self
,
results
):
"""Initialization before data preparation.
Args:
results (dict): Dict before data preprocessing.
- img_fields (list): Image fields.
- bbox3d_fields (list): 3D bounding boxes fields.
- pts_mask_fields (list): Mask fields of points.
- pts_seg_fields (list): Mask fields of point segments.
- bbox_fields (list): Fields of bounding boxes.
- mask_fields (list): Fields of masks.
- seg_fields (list): Segment fields.
- box_type_3d (str): 3D box type.
- box_mode_3d (str): 3D box mode.
"""
results
[
'img_prefix'
]
=
''
# self.img_prefix
# print('img_prefix', self.img_prefix)
results
[
'seg_prefix'
]
=
self
.
seg_prefix
results
[
'proposal_file'
]
=
self
.
proposal_file
results
[
'img_fields'
]
=
[]
results
[
'bbox3d_fields'
]
=
[]
results
[
'pts_mask_fields'
]
=
[]
results
[
'pts_seg_fields'
]
=
[]
results
[
'bbox_fields'
]
=
[]
results
[
'mask_fields'
]
=
[]
results
[
'seg_fields'
]
=
[]
results
[
'box_type_3d'
]
=
self
.
box_type_3d
results
[
'box_mode_3d'
]
=
self
.
box_mode_3d
def
_parse_ann_info
(
self
,
img_info
,
ann_info
):
"""Parse bbox annotation.
Args:
img_info (list[dict]): Image info.
ann_info (list[dict]): Annotation info of an image.
Returns:
dict: A dict containing the following keys: bboxes, labels,
\
gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d,
\
depths, bboxes_ignore, masks, seg_map
"""
gt_bboxes
=
[]
gt_labels
=
[]
attr_labels
=
[]
gt_bboxes_ignore
=
[]
gt_masks_ann
=
[]
gt_bboxes_cam3d
=
[]
centers2d
=
[]
depths
=
[]
for
i
,
ann
in
enumerate
(
ann_info
):
if
ann
.
get
(
'ignore'
,
False
):
continue
x1
,
y1
,
w
,
h
=
ann
[
'bbox'
]
inter_w
=
max
(
0
,
min
(
x1
+
w
,
img_info
[
'width'
])
-
max
(
x1
,
0
))
inter_h
=
max
(
0
,
min
(
y1
+
h
,
img_info
[
'height'
])
-
max
(
y1
,
0
))
if
inter_w
*
inter_h
==
0
:
continue
if
ann
[
'area'
]
<=
0
or
w
<
1
or
h
<
1
:
continue
if
ann
[
'category_id'
]
not
in
self
.
cat_ids
:
continue
bbox
=
[
x1
,
y1
,
x1
+
w
,
y1
+
h
]
if
ann
.
get
(
'iscrowd'
,
False
):
gt_bboxes_ignore
.
append
(
bbox
)
else
:
gt_bboxes
.
append
(
bbox
)
gt_labels
.
append
(
self
.
cat2label
[
ann
[
'category_id'
]])
attr_labels
.
append
(
ann
[
'attribute_id'
])
gt_masks_ann
.
append
(
ann
.
get
(
'segmentation'
,
None
))
# 3D annotations in camera coordinates
bbox_cam3d
=
np
.
array
(
ann
[
'bbox_cam3d'
]).
reshape
(
1
,
-
1
)
velo_cam3d
=
np
.
array
(
ann
[
'velo_cam3d'
]).
reshape
(
1
,
2
)
nan_mask
=
np
.
isnan
(
velo_cam3d
[:,
0
])
velo_cam3d
[
nan_mask
]
=
[
0.0
,
0.0
]
bbox_cam3d
=
np
.
concatenate
([
bbox_cam3d
,
velo_cam3d
],
axis
=-
1
)
gt_bboxes_cam3d
.
append
(
bbox_cam3d
.
squeeze
())
# 2.5D annotations in camera coordinates
center2d
=
ann
[
'center2d'
][:
2
]
depth
=
ann
[
'center2d'
][
2
]
centers2d
.
append
(
center2d
)
depths
.
append
(
depth
)
if
gt_bboxes
:
gt_bboxes
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
(
gt_labels
,
dtype
=
np
.
int64
)
attr_labels
=
np
.
array
(
attr_labels
,
dtype
=
np
.
int64
)
else
:
gt_bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
attr_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
if
gt_bboxes_cam3d
:
gt_bboxes_cam3d
=
np
.
array
(
gt_bboxes_cam3d
,
dtype
=
np
.
float32
)
centers2d
=
np
.
array
(
centers2d
,
dtype
=
np
.
float32
)
depths
=
np
.
array
(
depths
,
dtype
=
np
.
float32
)
else
:
gt_bboxes_cam3d
=
np
.
zeros
((
0
,
self
.
bbox_code_size
),
dtype
=
np
.
float32
)
centers2d
=
np
.
zeros
((
0
,
2
),
dtype
=
np
.
float32
)
depths
=
np
.
zeros
((
0
),
dtype
=
np
.
float32
)
gt_bboxes_cam3d
=
CameraInstance3DBoxes
(
gt_bboxes_cam3d
,
box_dim
=
gt_bboxes_cam3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
))
gt_labels_3d
=
copy
.
deepcopy
(
gt_labels
)
if
gt_bboxes_ignore
:
gt_bboxes_ignore
=
np
.
array
(
gt_bboxes_ignore
,
dtype
=
np
.
float32
)
else
:
gt_bboxes_ignore
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
seg_map
=
img_info
[
'filename'
].
replace
(
'jpg'
,
'png'
)
ann
=
dict
(
bboxes
=
gt_bboxes
,
labels
=
gt_labels
,
gt_bboxes_3d
=
gt_bboxes_cam3d
,
gt_labels_3d
=
gt_labels_3d
,
attr_labels
=
attr_labels
,
centers2d
=
centers2d
,
depths
=
depths
,
bboxes_ignore
=
gt_bboxes_ignore
,
masks
=
gt_masks_ann
,
seg_map
=
seg_map
)
return
ann
def
get_attr_name
(
self
,
attr_idx
,
label_name
):
"""Get attribute from predicted index.
This is a workaround to predict attribute when the predicted velocity
is not reliable. We map the predicted attribute index to the one
in the attribute set. If it is consistent with the category, we will
keep it. Otherwise, we will use the default attribute.
Args:
attr_idx (int): Attribute index.
label_name (str): Predicted category name.
Returns:
str: Predicted attribute name.
"""
# TODO: Simplify the variable name
AttrMapping_rev2
=
[
'cycle.with_rider'
,
'cycle.without_rider'
,
'pedestrian.moving'
,
'pedestrian.standing'
,
'pedestrian.sitting_lying_down'
,
'vehicle.moving'
,
'vehicle.parked'
,
'vehicle.stopped'
,
'None'
]
if
label_name
==
'car'
or
label_name
==
'bus'
\
or
label_name
==
'truck'
or
label_name
==
'trailer'
\
or
label_name
==
'construction_vehicle'
:
if
AttrMapping_rev2
[
attr_idx
]
==
'vehicle.moving'
or
\
AttrMapping_rev2
[
attr_idx
]
==
'vehicle.parked'
or
\
AttrMapping_rev2
[
attr_idx
]
==
'vehicle.stopped'
:
return
AttrMapping_rev2
[
attr_idx
]
else
:
return
CustomNuScenesMonoDataset
.
DefaultAttribute
[
label_name
]
elif
label_name
==
'pedestrian'
:
if
AttrMapping_rev2
[
attr_idx
]
==
'pedestrian.moving'
or
\
AttrMapping_rev2
[
attr_idx
]
==
'pedestrian.standing'
or
\
AttrMapping_rev2
[
attr_idx
]
==
\
'pedestrian.sitting_lying_down'
:
return
AttrMapping_rev2
[
attr_idx
]
else
:
return
CustomNuScenesMonoDataset
.
DefaultAttribute
[
label_name
]
elif
label_name
==
'bicycle'
or
label_name
==
'motorcycle'
:
if
AttrMapping_rev2
[
attr_idx
]
==
'cycle.with_rider'
or
\
AttrMapping_rev2
[
attr_idx
]
==
'cycle.without_rider'
:
return
AttrMapping_rev2
[
attr_idx
]
else
:
return
CustomNuScenesMonoDataset
.
DefaultAttribute
[
label_name
]
else
:
return
CustomNuScenesMonoDataset
.
DefaultAttribute
[
label_name
]
def
_format_bbox
(
self
,
results
,
jsonfile_prefix
=
None
):
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
nusc_annos
=
{}
mapped_class_names
=
self
.
CLASSES
print
(
'Start to convert detection format...'
)
CAM_NUM
=
6
for
sample_id
,
det
in
enumerate
(
mmcv
.
track_iter_progress
(
results
)):
if
sample_id
%
CAM_NUM
==
0
:
boxes_per_frame
=
[]
attrs_per_frame
=
[]
# need to merge results from images of the same sample
annos
=
[]
boxes
,
attrs
=
output_to_nusc_box
(
det
)
sample_token
=
self
.
data_infos
[
sample_id
][
'token'
]
boxes
,
attrs
=
cam_nusc_box_to_global
(
self
.
data_infos
[
sample_id
],
boxes
,
attrs
,
mapped_class_names
,
self
.
eval_detection_configs
,
self
.
eval_version
)
boxes_per_frame
.
extend
(
boxes
)
attrs_per_frame
.
extend
(
attrs
)
# Remove redundant predictions caused by overlap of images
if
(
sample_id
+
1
)
%
CAM_NUM
!=
0
:
continue
boxes
=
global_nusc_box_to_cam
(
self
.
data_infos
[
sample_id
+
1
-
CAM_NUM
],
boxes_per_frame
,
mapped_class_names
,
self
.
eval_detection_configs
,
self
.
eval_version
)
cam_boxes3d
,
scores
,
labels
=
nusc_box_to_cam_box3d
(
boxes
)
# box nms 3d over 6 images in a frame
# TODO: move this global setting into config
nms_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
4096
,
nms_thr
=
0.05
,
score_thr
=
0.01
,
min_bbox_size
=
0
,
max_per_frame
=
500
)
from
mmcv
import
Config
nms_cfg
=
Config
(
nms_cfg
)
cam_boxes3d_for_nms
=
xywhr2xyxyr
(
cam_boxes3d
.
bev
)
boxes3d
=
cam_boxes3d
.
tensor
# generate attr scores from attr labels
attrs
=
labels
.
new_tensor
([
attr
for
attr
in
attrs_per_frame
])
boxes3d
,
scores
,
labels
,
attrs
=
box3d_multiclass_nms
(
boxes3d
,
cam_boxes3d_for_nms
,
scores
,
nms_cfg
.
score_thr
,
nms_cfg
.
max_per_frame
,
nms_cfg
,
mlvl_attr_scores
=
attrs
)
cam_boxes3d
=
CameraInstance3DBoxes
(
boxes3d
,
box_dim
=
9
)
det
=
bbox3d2result
(
cam_boxes3d
,
scores
,
labels
,
attrs
)
boxes
,
attrs
=
output_to_nusc_box
(
det
)
boxes
,
attrs
=
cam_nusc_box_to_global
(
self
.
data_infos
[
sample_id
+
1
-
CAM_NUM
],
boxes
,
attrs
,
mapped_class_names
,
self
.
eval_detection_configs
,
self
.
eval_version
)
for
i
,
box
in
enumerate
(
boxes
):
name
=
mapped_class_names
[
box
.
label
]
attr
=
self
.
get_attr_name
(
attrs
[
i
],
name
)
nusc_anno
=
dict
(
sample_token
=
sample_token
,
translation
=
box
.
center
.
tolist
(),
size
=
box
.
wlh
.
tolist
(),
rotation
=
box
.
orientation
.
elements
.
tolist
(),
velocity
=
box
.
velocity
[:
2
].
tolist
(),
detection_name
=
name
,
detection_score
=
box
.
score
,
attribute_name
=
attr
)
annos
.
append
(
nusc_anno
)
# other views results of the same frame should be concatenated
if
sample_token
in
nusc_annos
:
nusc_annos
[
sample_token
].
extend
(
annos
)
else
:
nusc_annos
[
sample_token
]
=
annos
nusc_submissions
=
{
'meta'
:
self
.
modality
,
'results'
:
nusc_annos
,
}
mmcv
.
mkdir_or_exist
(
jsonfile_prefix
)
res_path
=
osp
.
join
(
jsonfile_prefix
,
'results_nusc.json'
)
print
(
'Results writes to'
,
res_path
)
mmcv
.
dump
(
nusc_submissions
,
res_path
)
return
res_path
def
_evaluate_single
(
self
,
result_path
,
logger
=
None
,
metric
=
'bbox'
,
result_name
=
'img_bbox'
):
"""Evaluation for a single model in nuScenes protocol.
Args:
result_path (str): Path of the result file.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
metric (str): Metric name used for evaluation. Default: 'bbox'.
result_name (str): Result name in the metric prefix.
Default: 'img_bbox'.
Returns:
dict: Dictionary of evaluation details.
"""
from
nuscenes
import
NuScenes
#from nuscenes.eval.detection.evaluate import NuScenesEval
from
.nuscnes_eval
import
NuScenesEval_custom
output_dir
=
osp
.
join
(
*
osp
.
split
(
result_path
)[:
-
1
])
self
.
nusc
=
NuScenes
(
version
=
self
.
version
,
dataroot
=
self
.
data_root
,
verbose
=
False
)
eval_set_map
=
{
'v1.0-mini'
:
'mini_val'
,
'v1.0-trainval'
:
'val'
,
}
# nusc_eval = NuScenesEval(
# nusc,
# config=self.eval_detection_configs,
# result_path=result_path,
# eval_set=eval_set_map[self.version],
# output_dir=output_dir,
# verbose=False)
self
.
nusc_eval
=
NuScenesEval_custom
(
self
.
nusc
,
config
=
self
.
eval_detection_configs
,
result_path
=
result_path
,
eval_set
=
eval_set_map
[
self
.
version
],
output_dir
=
output_dir
,
verbose
=
True
,
overlap_test
=
self
.
overlap_test
,
data_infos
=
self
.
data_infos
)
self
.
nusc_eval
.
main
(
render_curves
=
True
)
# record metrics
metrics
=
mmcv
.
load
(
osp
.
join
(
output_dir
,
'metrics_summary.json'
))
detail
=
dict
()
metric_prefix
=
f
'
{
result_name
}
_NuScenes'
for
name
in
self
.
CLASSES
:
for
k
,
v
in
metrics
[
'label_aps'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_AP_dist_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'label_tp_errors'
][
name
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}_{}'
.
format
(
metric_prefix
,
name
,
k
)]
=
val
for
k
,
v
in
metrics
[
'tp_errors'
].
items
():
val
=
float
(
'{:.4f}'
.
format
(
v
))
detail
[
'{}/{}'
.
format
(
metric_prefix
,
self
.
ErrNameMapping
[
k
])]
=
val
detail
[
'{}/NDS'
.
format
(
metric_prefix
)]
=
metrics
[
'nd_score'
]
detail
[
'{}/mAP'
.
format
(
metric_prefix
)]
=
metrics
[
'mean_ap'
]
return
detail
def
format_results
(
self
,
results
,
jsonfile_prefix
=
None
,
**
kwargs
):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list[tuple | numpy.ndarray]): Testing results of the
dataset.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
\
the json filepaths, tmp_dir is the temporal directory created
\
for saving json files when jsonfile_prefix is not specified.
"""
assert
isinstance
(
results
,
list
),
'results must be a list'
assert
len
(
results
)
==
len
(
self
),
(
'The length of results is not equal to the dataset len: {} != {}'
.
format
(
len
(
results
),
len
(
self
)))
if
jsonfile_prefix
is
None
:
tmp_dir
=
tempfile
.
TemporaryDirectory
()
jsonfile_prefix
=
osp
.
join
(
tmp_dir
.
name
,
'results'
)
else
:
tmp_dir
=
None
# currently the output prediction results could be in two formats
# 1. list of dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...)
# 2. list of dict('pts_bbox' or 'img_bbox':
# dict('boxes_3d': ..., 'scores_3d': ..., 'labels_3d': ...))
# this is a workaround to enable evaluation of both formats on nuScenes
# refer to https://github.com/open-mmlab/mmdetection3d/issues/449
if
not
(
'pts_bbox'
in
results
[
0
]
or
'img_bbox'
in
results
[
0
]):
result_files
=
self
.
_format_bbox
(
results
,
jsonfile_prefix
)
else
:
# should take the inner dict out of 'pts_bbox' or 'img_bbox' dict
result_files
=
dict
()
for
name
in
results
[
0
]:
# not evaluate 2D predictions on nuScenes
if
'2d'
in
name
:
continue
print
(
f
'
\n
Formating bboxes of
{
name
}
'
)
results_
=
[
out
[
name
]
for
out
in
results
]
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
result_files
.
update
(
{
name
:
self
.
_format_bbox
(
results_
,
tmp_file_
)})
return
result_files
,
tmp_dir
def
evaluate
(
self
,
results
,
metric
=
'bbox'
,
logger
=
None
,
jsonfile_prefix
=
None
,
result_names
=
[
'img_bbox'
],
show
=
False
,
out_dir
=
None
,
pipeline
=
None
):
"""Evaluation in nuScenes protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
show (bool): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
result_files
,
tmp_dir
=
self
.
format_results
(
results
,
jsonfile_prefix
)
if
isinstance
(
result_files
,
dict
):
results_dict
=
dict
()
for
name
in
result_names
:
print
(
'Evaluating bboxes of {}'
.
format
(
name
))
ret_dict
=
self
.
_evaluate_single
(
result_files
[
name
])
results_dict
.
update
(
ret_dict
)
elif
isinstance
(
result_files
,
str
):
results_dict
=
self
.
_evaluate_single
(
result_files
)
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
if
show
:
self
.
show
(
results
,
out_dir
,
pipeline
=
pipeline
)
return
results_dict
def
_extract_data
(
self
,
index
,
pipeline
,
key
,
load_annos
=
False
):
"""Load data using input pipeline and extract data according to key.
Args:
index (int): Index for accessing the target data.
pipeline (:obj:`Compose`): Composed data loading pipeline.
key (str | list[str]): One single or a list of data key.
load_annos (bool): Whether to load data annotations.
If True, need to set self.test_mode as False before loading.
Returns:
np.ndarray | torch.Tensor | list[np.ndarray | torch.Tensor]:
A single or a list of loaded data.
"""
assert
pipeline
is
not
None
,
'data loading pipeline is not provided'
img_info
=
self
.
data_infos
[
index
]
input_dict
=
dict
(
img_info
=
img_info
)
if
load_annos
:
ann_info
=
self
.
get_ann_info
(
index
)
input_dict
.
update
(
dict
(
ann_info
=
ann_info
))
self
.
pre_pipeline
(
input_dict
)
example
=
pipeline
(
input_dict
)
# extract data items according to keys
if
isinstance
(
key
,
str
):
data
=
extract_result_dict
(
example
,
key
)
else
:
data
=
[
extract_result_dict
(
example
,
k
)
for
k
in
key
]
return
data
def
_get_pipeline
(
self
,
pipeline
):
"""Get data loading pipeline in self.show/evaluate function.
Args:
pipeline (list[dict] | None): Input pipeline. If None is given,
\
get from self.pipeline.
"""
if
pipeline
is
None
:
if
not
hasattr
(
self
,
'pipeline'
)
or
self
.
pipeline
is
None
:
warnings
.
warn
(
'Use default pipeline for data loading, this may cause '
'errors when data is on ceph'
)
return
self
.
_build_default_pipeline
()
loading_pipeline
=
get_loading_pipeline
(
self
.
pipeline
.
transforms
)
return
Compose
(
loading_pipeline
)
return
Compose
(
pipeline
)
def
_build_default_pipeline
(
self
):
"""Build the default pipeline for this dataset."""
pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
self
.
CLASSES
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
])
]
return
Compose
(
pipeline
)
def
show
(
self
,
results
,
out_dir
,
show
=
True
,
pipeline
=
None
):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert
out_dir
is
not
None
,
'Expect out_dir, got none.'
pipeline
=
self
.
_get_pipeline
(
pipeline
)
for
i
,
result
in
enumerate
(
results
):
if
'img_bbox'
in
result
.
keys
():
result
=
result
[
'img_bbox'
]
data_info
=
self
.
data_infos
[
i
]
img_path
=
data_info
[
'file_name'
]
file_name
=
osp
.
split
(
img_path
)[
-
1
].
split
(
'.'
)[
0
]
img
,
img_metas
=
self
.
_extract_data
(
i
,
pipeline
,
[
'img'
,
'img_metas'
])
# need to transpose channel to first dim
img
=
img
.
numpy
().
transpose
(
1
,
2
,
0
)
gt_bboxes
=
self
.
get_ann_info
(
i
)[
'gt_bboxes_3d'
]
pred_bboxes
=
result
[
'boxes_3d'
]
show_multi_modality_result
(
img
,
gt_bboxes
,
pred_bboxes
,
img_metas
[
'cam2img'
],
out_dir
,
file_name
,
box_mode
=
'camera'
,
show
=
show
)
def
output_to_nusc_box
(
detection
):
"""Convert the output to the box class in the nuScenes.
Args:
detection (dict): Detection results.
- boxes_3d (:obj:`BaseInstance3DBoxes`): Detection bbox.
- scores_3d (torch.Tensor): Detection scores.
- labels_3d (torch.Tensor): Predicted box labels.
- attrs_3d (torch.Tensor, optional): Predicted attributes.
Returns:
list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
"""
box3d
=
detection
[
'boxes_3d'
]
scores
=
detection
[
'scores_3d'
].
numpy
()
labels
=
detection
[
'labels_3d'
].
numpy
()
attrs
=
None
if
'attrs_3d'
in
detection
:
attrs
=
detection
[
'attrs_3d'
].
numpy
()
box_gravity_center
=
box3d
.
gravity_center
.
numpy
()
box_dims
=
box3d
.
dims
.
numpy
()
box_yaw
=
box3d
.
yaw
.
numpy
()
# convert the dim/rot to nuscbox convention
box_dims
[:,
[
0
,
1
,
2
]]
=
box_dims
[:,
[
2
,
0
,
1
]]
box_yaw
=
-
box_yaw
box_list
=
[]
for
i
in
range
(
len
(
box3d
)):
q1
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
q2
=
pyquaternion
.
Quaternion
(
axis
=
[
1
,
0
,
0
],
radians
=
np
.
pi
/
2
)
quat
=
q2
*
q1
velocity
=
(
box3d
.
tensor
[
i
,
7
],
0.0
,
box3d
.
tensor
[
i
,
8
])
box
=
NuScenesBox
(
box_gravity_center
[
i
],
box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
velocity
=
velocity
)
box_list
.
append
(
box
)
return
box_list
,
attrs
def
cam_nusc_box_to_global
(
info
,
boxes
,
attrs
,
classes
,
eval_configs
,
eval_version
=
'detection_cvpr_2019'
):
"""Convert the box from camera to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
attr_list
=
[]
for
(
box
,
attr
)
in
zip
(
boxes
,
attrs
):
# Move box to ego vehicle coord system
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
'cam2ego_rotation'
]))
box
.
translate
(
np
.
array
(
info
[
'cam2ego_translation'
]))
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to global coord system
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
'ego2global_rotation'
]))
box
.
translate
(
np
.
array
(
info
[
'ego2global_translation'
]))
box_list
.
append
(
box
)
attr_list
.
append
(
attr
)
return
box_list
,
attr_list
def
global_nusc_box_to_cam
(
info
,
boxes
,
classes
,
eval_configs
,
eval_version
=
'detection_cvpr_2019'
):
"""Convert the box from global to camera coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
eval_version (str): Evaluation version.
Default: 'detection_cvpr_2019'
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
for
box
in
boxes
:
# Move box to ego vehicle coord system
box
.
translate
(
-
np
.
array
(
info
[
'ego2global_translation'
]))
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
'ego2global_rotation'
]).
inverse
)
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to camera coord system
box
.
translate
(
-
np
.
array
(
info
[
'cam2ego_translation'
]))
box
.
rotate
(
pyquaternion
.
Quaternion
(
info
[
'cam2ego_rotation'
]).
inverse
)
box_list
.
append
(
box
)
return
box_list
def
nusc_box_to_cam_box3d
(
boxes
):
"""Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.
Args:
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
Returns:
tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
\
Converted 3D bounding boxes, scores and labels.
"""
locs
=
torch
.
Tensor
([
b
.
center
for
b
in
boxes
]).
view
(
-
1
,
3
)
dims
=
torch
.
Tensor
([
b
.
wlh
for
b
in
boxes
]).
view
(
-
1
,
3
)
rots
=
torch
.
Tensor
([
b
.
orientation
.
yaw_pitch_roll
[
0
]
for
b
in
boxes
]).
view
(
-
1
,
1
)
velocity
=
torch
.
Tensor
([
b
.
velocity
[:
2
]
for
b
in
boxes
]).
view
(
-
1
,
2
)
# convert nusbox to cambox convention
dims
[:,
[
0
,
1
,
2
]]
=
dims
[:,
[
1
,
2
,
0
]]
rots
=
-
rots
boxes_3d
=
torch
.
cat
([
locs
,
dims
,
rots
,
velocity
],
dim
=
1
).
cuda
()
cam_boxes3d
=
CameraInstance3DBoxes
(
boxes_3d
,
box_dim
=
9
,
origin
=
(
0.5
,
0.5
,
0.5
))
scores
=
torch
.
Tensor
([
b
.
score
for
b
in
boxes
]).
cuda
()
labels
=
torch
.
LongTensor
([
b
.
label
for
b
in
boxes
]).
cuda
()
nms_scores
=
scores
.
new_zeros
(
scores
.
shape
[
0
],
10
+
1
)
indices
=
labels
.
new_tensor
(
list
(
range
(
scores
.
shape
[
0
])))
nms_scores
[
indices
,
labels
]
=
scores
return
cam_boxes3d
,
nms_scores
,
labels
\ No newline at end of file
projects/mmdet3d_plugin/datasets/nuscnes_eval.py
0 → 100644
View file @
4cd43886
import
argparse
import
copy
import
json
import
os
import
time
from
typing
import
Tuple
,
Dict
,
Any
import
torch
import
numpy
as
np
from
nuscenes
import
NuScenes
from
nuscenes.eval.common.config
import
config_factory
from
nuscenes.eval.common.data_classes
import
EvalBoxes
from
nuscenes.eval.detection.data_classes
import
DetectionConfig
from
nuscenes.eval.detection.evaluate
import
NuScenesEval
from
pyquaternion
import
Quaternion
from
nuscenes
import
NuScenes
from
nuscenes.eval.common.data_classes
import
EvalBoxes
from
nuscenes.eval.detection.data_classes
import
DetectionBox
from
nuscenes.eval.detection.utils
import
category_to_detection_name
from
nuscenes.eval.tracking.data_classes
import
TrackingBox
from
nuscenes.utils.data_classes
import
Box
from
nuscenes.utils.geometry_utils
import
points_in_box
from
nuscenes.utils.splits
import
create_splits_scenes
from
nuscenes.eval.common.loaders
import
load_prediction
,
add_center_dist
,
filter_eval_boxes
import
tqdm
from
nuscenes.utils.geometry_utils
import
view_points
,
box_in_image
,
BoxVisibility
,
transform_matrix
from
torchvision.transforms.functional
import
rotate
import
pycocotools.mask
as
mask_util
# from projects.mmdet3d_plugin.models.utils.visual import save_tensor
from
torchvision.transforms.functional
import
rotate
import
cv2
import
argparse
import
json
import
os
import
random
import
time
from
typing
import
Tuple
,
Dict
,
Any
import
numpy
as
np
from
nuscenes
import
NuScenes
from
nuscenes.eval.common.config
import
config_factory
from
nuscenes.eval.common.data_classes
import
EvalBoxes
from
nuscenes.eval.common.loaders
import
load_prediction
,
load_gt
,
add_center_dist
,
filter_eval_boxes
from
nuscenes.eval.detection.algo
import
accumulate
,
calc_ap
,
calc_tp
from
nuscenes.eval.detection.constants
import
TP_METRICS
from
nuscenes.eval.detection.data_classes
import
DetectionConfig
,
DetectionMetrics
,
DetectionBox
,
\
DetectionMetricDataList
from
nuscenes.eval.detection.render
import
summary_plot
,
class_pr_curve
,
dist_pr_curve
,
visualize_sample
from
nuscenes.eval.common.utils
import
quaternion_yaw
,
Quaternion
from
mmdet3d.core.bbox.iou_calculators
import
BboxOverlaps3D
from
IPython
import
embed
import
json
from
typing
import
Any
import
numpy
as
np
from
matplotlib
import
pyplot
as
plt
from
nuscenes
import
NuScenes
from
nuscenes.eval.common.data_classes
import
EvalBoxes
from
nuscenes.eval.common.render
import
setup_axis
from
nuscenes.eval.common.utils
import
boxes_to_sensor
from
nuscenes.eval.detection.constants
import
TP_METRICS
,
DETECTION_NAMES
,
DETECTION_COLORS
,
TP_METRICS_UNITS
,
\
PRETTY_DETECTION_NAMES
,
PRETTY_TP_METRICS
from
nuscenes.eval.detection.data_classes
import
DetectionMetrics
,
DetectionMetricData
,
DetectionMetricDataList
from
nuscenes.utils.data_classes
import
LidarPointCloud
from
nuscenes.utils.geometry_utils
import
view_points
Axis
=
Any
def
class_tp_curve
(
md_list
:
DetectionMetricDataList
,
metrics
:
DetectionMetrics
,
detection_name
:
str
,
min_recall
:
float
,
dist_th_tp
:
float
,
savepath
:
str
=
None
,
ax
:
Axis
=
None
)
->
None
:
"""
Plot the true positive curve for the specified class.
:param md_list: DetectionMetricDataList instance.
:param metrics: DetectionMetrics instance.
:param detection_name:
:param min_recall: Minimum recall value.
:param dist_th_tp: The distance threshold used to determine matches.
:param savepath: If given, saves the the rendering here instead of displaying.
:param ax: Axes onto which to render.
"""
# Get metric data for given detection class with tp distance threshold.
md
=
md_list
[(
detection_name
,
dist_th_tp
)]
min_recall_ind
=
round
(
100
*
min_recall
)
if
min_recall_ind
<=
md
.
max_recall_ind
:
# For traffic_cone and barrier only a subset of the metrics are plotted.
rel_metrics
=
[
m
for
m
in
TP_METRICS
if
not
np
.
isnan
(
metrics
.
get_label_tp
(
detection_name
,
m
))]
ylimit
=
max
([
max
(
getattr
(
md
,
metric
)[
min_recall_ind
:
md
.
max_recall_ind
+
1
])
for
metric
in
rel_metrics
])
*
1.1
else
:
ylimit
=
1.0
# Prepare axis.
if
ax
is
None
:
ax
=
setup_axis
(
title
=
PRETTY_DETECTION_NAMES
[
detection_name
],
xlabel
=
'Recall'
,
ylabel
=
'Error'
,
xlim
=
1
,
min_recall
=
min_recall
)
ax
.
set_ylim
(
0
,
ylimit
)
# Plot the recall vs. error curve for each tp metric.
for
metric
in
TP_METRICS
:
tp
=
metrics
.
get_label_tp
(
detection_name
,
metric
)
# Plot only if we have valid data.
if
tp
is
not
np
.
nan
and
min_recall_ind
<=
md
.
max_recall_ind
:
recall
,
error
=
md
.
recall
[:
md
.
max_recall_ind
+
1
],
getattr
(
md
,
metric
)[:
md
.
max_recall_ind
+
1
]
else
:
recall
,
error
=
[],
[]
# Change legend based on tp value
if
tp
is
np
.
nan
:
label
=
'{}: n/a'
.
format
(
PRETTY_TP_METRICS
[
metric
])
elif
min_recall_ind
>
md
.
max_recall_ind
:
label
=
'{}: nan'
.
format
(
PRETTY_TP_METRICS
[
metric
])
else
:
label
=
'{}: {:.2f} ({})'
.
format
(
PRETTY_TP_METRICS
[
metric
],
tp
,
TP_METRICS_UNITS
[
metric
])
if
metric
==
'trans_err'
:
label
+=
f
' (
{
md
.
max_recall_ind
}
)'
# add recall
print
(
f
'Recall:
{
detection_name
}
:
{
md
.
max_recall_ind
/
100
}
'
)
ax
.
plot
(
recall
,
error
,
label
=
label
)
ax
.
axvline
(
x
=
md
.
max_recall
,
linestyle
=
'-.'
,
color
=
(
0
,
0
,
0
,
0.3
))
ax
.
legend
(
loc
=
'best'
)
if
savepath
is
not
None
:
plt
.
savefig
(
savepath
)
plt
.
close
()
class
DetectionBox_modified
(
DetectionBox
):
def
__init__
(
self
,
*
args
,
token
=
None
,
visibility
=
None
,
index
=
None
,
**
kwargs
):
'''
add annotation token
'''
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
token
=
token
self
.
visibility
=
visibility
self
.
index
=
index
def
serialize
(
self
)
->
dict
:
""" Serialize instance into json-friendly format. """
return
{
'token'
:
self
.
token
,
'sample_token'
:
self
.
sample_token
,
'translation'
:
self
.
translation
,
'size'
:
self
.
size
,
'rotation'
:
self
.
rotation
,
'velocity'
:
self
.
velocity
,
'ego_translation'
:
self
.
ego_translation
,
'num_pts'
:
self
.
num_pts
,
'detection_name'
:
self
.
detection_name
,
'detection_score'
:
self
.
detection_score
,
'attribute_name'
:
self
.
attribute_name
,
'visibility'
:
self
.
visibility
,
'index'
:
self
.
index
}
@
classmethod
def
deserialize
(
cls
,
content
:
dict
):
""" Initialize from serialized content. """
return
cls
(
token
=
content
[
'token'
],
sample_token
=
content
[
'sample_token'
],
translation
=
tuple
(
content
[
'translation'
]),
size
=
tuple
(
content
[
'size'
]),
rotation
=
tuple
(
content
[
'rotation'
]),
velocity
=
tuple
(
content
[
'velocity'
]),
ego_translation
=
(
0.0
,
0.0
,
0.0
)
if
'ego_translation'
not
in
content
else
tuple
(
content
[
'ego_translation'
]),
num_pts
=-
1
if
'num_pts'
not
in
content
else
int
(
content
[
'num_pts'
]),
detection_name
=
content
[
'detection_name'
],
detection_score
=-
1.0
if
'detection_score'
not
in
content
else
float
(
content
[
'detection_score'
]),
attribute_name
=
content
[
'attribute_name'
],
visibility
=
content
[
'visibility'
],
index
=
content
[
'index'
],
)
def
center_in_image
(
box
,
intrinsic
:
np
.
ndarray
,
imsize
:
Tuple
[
int
,
int
],
vis_level
:
int
=
BoxVisibility
.
ANY
)
->
bool
:
"""
Check if a box is visible inside an image without accounting for occlusions.
:param box: The box to be checked.
:param intrinsic: <float: 3, 3>. Intrinsic camera matrix.
:param imsize: (width, height).
:param vis_level: One of the enumerations of <BoxVisibility>.
:return True if visibility condition is satisfied.
"""
center_3d
=
box
.
center
.
reshape
(
3
,
1
)
center_img
=
view_points
(
center_3d
,
intrinsic
,
normalize
=
True
)[:
2
,
:]
visible
=
np
.
logical_and
(
center_img
[
0
,
:]
>
0
,
center_img
[
0
,
:]
<
imsize
[
0
])
visible
=
np
.
logical_and
(
visible
,
center_img
[
1
,
:]
<
imsize
[
1
])
visible
=
np
.
logical_and
(
visible
,
center_img
[
1
,
:]
>
0
)
visible
=
np
.
logical_and
(
visible
,
center_3d
[
2
,
:]
>
1
)
in_front
=
center_3d
[
2
,
:]
>
0.1
# True if a corner is at least 0.1 meter in front of the camera.
if
vis_level
==
BoxVisibility
.
ALL
:
return
all
(
visible
)
and
all
(
in_front
)
elif
vis_level
==
BoxVisibility
.
ANY
:
return
any
(
visible
)
and
all
(
in_front
)
elif
vis_level
==
BoxVisibility
.
NONE
:
return
True
else
:
raise
ValueError
(
"vis_level: {} not valid"
.
format
(
vis_level
))
def
exist_corners_in_image_but_not_all
(
box
,
intrinsic
:
np
.
ndarray
,
imsize
:
Tuple
[
int
,
int
],
vis_level
:
int
=
BoxVisibility
.
ANY
)
->
bool
:
"""
Check if a box is visible in images but not all corners in image .
:param box: The box to be checked.
:param intrinsic: <float: 3, 3>. Intrinsic camera matrix.
:param imsize: (width, height).
:param vis_level: One of the enumerations of <BoxVisibility>.
:return True if visibility condition is satisfied.
"""
corners_3d
=
box
.
corners
()
corners_img
=
view_points
(
corners_3d
,
intrinsic
,
normalize
=
True
)[:
2
,
:]
visible
=
np
.
logical_and
(
corners_img
[
0
,
:]
>
0
,
corners_img
[
0
,
:]
<
imsize
[
0
])
visible
=
np
.
logical_and
(
visible
,
corners_img
[
1
,
:]
<
imsize
[
1
])
visible
=
np
.
logical_and
(
visible
,
corners_img
[
1
,
:]
>
0
)
visible
=
np
.
logical_and
(
visible
,
corners_3d
[
2
,
:]
>
1
)
in_front
=
corners_3d
[
2
,
:]
>
0.1
# True if a corner is at least 0.1 meter in front of the camera.
if
any
(
visible
)
and
not
all
(
visible
)
and
all
(
in_front
):
return
True
else
:
return
False
def
load_gt
(
nusc
:
NuScenes
,
eval_split
:
str
,
box_cls
,
verbose
:
bool
=
False
):
"""
Loads ground truth boxes from DB.
:param nusc: A NuScenes instance.
:param eval_split: The evaluation split for which we load GT boxes.
:param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
:param verbose: Whether to print messages to stdout.
:return: The GT boxes.
"""
# Init.
if
box_cls
==
DetectionBox_modified
:
attribute_map
=
{
a
[
'token'
]:
a
[
'name'
]
for
a
in
nusc
.
attribute
}
if
verbose
:
print
(
'Loading annotations for {} split from nuScenes version: {}'
.
format
(
eval_split
,
nusc
.
version
))
# Read out all sample_tokens in DB.
sample_tokens_all
=
[
s
[
'token'
]
for
s
in
nusc
.
sample
]
assert
len
(
sample_tokens_all
)
>
0
,
"Error: Database has no samples!"
# Only keep samples from this split.
splits
=
create_splits_scenes
()
# Check compatibility of split with nusc_version.
version
=
nusc
.
version
if
eval_split
in
{
'train'
,
'val'
,
'train_detect'
,
'train_track'
}:
assert
version
.
endswith
(
'trainval'
),
\
'Error: Requested split {} which is not compatible with NuScenes version {}'
.
format
(
eval_split
,
version
)
elif
eval_split
in
{
'mini_train'
,
'mini_val'
}:
assert
version
.
endswith
(
'mini'
),
\
'Error: Requested split {} which is not compatible with NuScenes version {}'
.
format
(
eval_split
,
version
)
elif
eval_split
==
'test'
:
assert
version
.
endswith
(
'test'
),
\
'Error: Requested split {} which is not compatible with NuScenes version {}'
.
format
(
eval_split
,
version
)
else
:
raise
ValueError
(
'Error: Requested split {} which this function cannot map to the correct NuScenes version.'
.
format
(
eval_split
))
if
eval_split
==
'test'
:
# Check that you aren't trying to cheat :).
assert
len
(
nusc
.
sample_annotation
)
>
0
,
\
'Error: You are trying to evaluate on the test set but you do not have the annotations!'
index_map
=
{}
for
scene
in
nusc
.
scene
:
first_sample_token
=
scene
[
'first_sample_token'
]
sample
=
nusc
.
get
(
'sample'
,
first_sample_token
)
index_map
[
first_sample_token
]
=
1
index
=
2
while
sample
[
'next'
]
!=
''
:
sample
=
nusc
.
get
(
'sample'
,
sample
[
'next'
])
index_map
[
sample
[
'token'
]]
=
index
index
+=
1
sample_tokens
=
[]
for
sample_token
in
sample_tokens_all
:
scene_token
=
nusc
.
get
(
'sample'
,
sample_token
)[
'scene_token'
]
scene_record
=
nusc
.
get
(
'scene'
,
scene_token
)
if
scene_record
[
'name'
]
in
splits
[
eval_split
]:
sample_tokens
.
append
(
sample_token
)
all_annotations
=
EvalBoxes
()
# Load annotations and filter predictions and annotations.
tracking_id_set
=
set
()
for
sample_token
in
tqdm
.
tqdm
(
sample_tokens
,
leave
=
verbose
):
sample
=
nusc
.
get
(
'sample'
,
sample_token
)
sample_annotation_tokens
=
sample
[
'anns'
]
sample_boxes
=
[]
for
sample_annotation_token
in
sample_annotation_tokens
:
sample_annotation
=
nusc
.
get
(
'sample_annotation'
,
sample_annotation_token
)
if
box_cls
==
DetectionBox_modified
:
# Get label name in detection task and filter unused labels.
detection_name
=
category_to_detection_name
(
sample_annotation
[
'category_name'
])
if
detection_name
is
None
:
continue
# Get attribute_name.
attr_tokens
=
sample_annotation
[
'attribute_tokens'
]
attr_count
=
len
(
attr_tokens
)
if
attr_count
==
0
:
attribute_name
=
''
elif
attr_count
==
1
:
attribute_name
=
attribute_map
[
attr_tokens
[
0
]]
else
:
raise
Exception
(
'Error: GT annotations must not have more than one attribute!'
)
sample_boxes
.
append
(
box_cls
(
token
=
sample_annotation_token
,
sample_token
=
sample_token
,
translation
=
sample_annotation
[
'translation'
],
size
=
sample_annotation
[
'size'
],
rotation
=
sample_annotation
[
'rotation'
],
velocity
=
nusc
.
box_velocity
(
sample_annotation
[
'token'
])[:
2
],
num_pts
=
sample_annotation
[
'num_lidar_pts'
]
+
sample_annotation
[
'num_radar_pts'
],
detection_name
=
detection_name
,
detection_score
=-
1.0
,
# GT samples do not have a score.
attribute_name
=
attribute_name
,
visibility
=
sample_annotation
[
'visibility_token'
],
index
=
index_map
[
sample_token
]
)
)
elif
box_cls
==
TrackingBox
:
assert
False
else
:
raise
NotImplementedError
(
'Error: Invalid box_cls %s!'
%
box_cls
)
all_annotations
.
add_boxes
(
sample_token
,
sample_boxes
)
if
verbose
:
print
(
"Loaded ground truth annotations for {} samples."
.
format
(
len
(
all_annotations
.
sample_tokens
)))
return
all_annotations
def
filter_eval_boxes_by_id
(
nusc
:
NuScenes
,
eval_boxes
:
EvalBoxes
,
id
=
None
,
verbose
:
bool
=
False
)
->
EvalBoxes
:
"""
Applies filtering to boxes. Distance, bike-racks and points per box.
:param nusc: An instance of the NuScenes class.
:param eval_boxes: An instance of the EvalBoxes class.
:param is: the anns token set that used to keep bboxes.
:param verbose: Whether to print to stdout.
"""
# Accumulators for number of filtered boxes.
total
,
anns_filter
=
0
,
0
for
ind
,
sample_token
in
enumerate
(
eval_boxes
.
sample_tokens
):
# Filter on anns
total
+=
len
(
eval_boxes
[
sample_token
])
filtered_boxes
=
[]
for
box
in
eval_boxes
[
sample_token
]:
if
box
.
token
in
id
:
filtered_boxes
.
append
(
box
)
anns_filter
+=
len
(
filtered_boxes
)
eval_boxes
.
boxes
[
sample_token
]
=
filtered_boxes
if
verbose
:
print
(
"=> Original number of boxes: %d"
%
total
)
print
(
"=> After anns based filtering: %d"
%
anns_filter
)
return
eval_boxes
def
filter_eval_boxes_by_visibility
(
ori_eval_boxes
:
EvalBoxes
,
visibility
=
None
,
verbose
:
bool
=
False
)
->
EvalBoxes
:
"""
Applies filtering to boxes. Distance, bike-racks and points per box.
:param nusc: An instance of the NuScenes class.
:param eval_boxes: An instance of the EvalBoxes class.
:param is: the anns token set that used to keep bboxes.
:param verbose: Whether to print to stdout.
"""
# Accumulators for number of filtered boxes.
eval_boxes
=
copy
.
deepcopy
(
ori_eval_boxes
)
total
,
anns_filter
=
0
,
0
for
ind
,
sample_token
in
enumerate
(
eval_boxes
.
sample_tokens
):
# Filter on anns
total
+=
len
(
eval_boxes
[
sample_token
])
filtered_boxes
=
[]
for
box
in
eval_boxes
[
sample_token
]:
if
box
.
visibility
==
visibility
:
filtered_boxes
.
append
(
box
)
anns_filter
+=
len
(
filtered_boxes
)
eval_boxes
.
boxes
[
sample_token
]
=
filtered_boxes
if
verbose
:
print
(
"=> Original number of boxes: %d"
%
total
)
print
(
"=> After visibility based filtering: %d"
%
anns_filter
)
return
eval_boxes
def
filter_by_sample_token
(
ori_eval_boxes
,
valid_sample_tokens
=
[],
verbose
=
False
):
eval_boxes
=
copy
.
deepcopy
(
ori_eval_boxes
)
for
sample_token
in
eval_boxes
.
sample_tokens
:
if
sample_token
not
in
valid_sample_tokens
:
eval_boxes
.
boxes
.
pop
(
sample_token
)
return
eval_boxes
def
filter_eval_boxes_by_overlap
(
nusc
:
NuScenes
,
eval_boxes
:
EvalBoxes
,
verbose
:
bool
=
False
)
->
EvalBoxes
:
"""
Applies filtering to boxes. basedon overlap .
:param nusc: An instance of the NuScenes class.
:param eval_boxes: An instance of the EvalBoxes class.
:param verbose: Whether to print to stdout.
"""
# Accumulators for number of filtered boxes.
cams
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_RIGHT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_FRONT_LEFT'
]
total
,
anns_filter
=
0
,
0
for
ind
,
sample_token
in
enumerate
(
eval_boxes
.
sample_tokens
):
# Filter on anns
total
+=
len
(
eval_boxes
[
sample_token
])
sample_record
=
nusc
.
get
(
'sample'
,
sample_token
)
filtered_boxes
=
[]
for
box
in
eval_boxes
[
sample_token
]:
count
=
0
for
cam
in
cams
:
'''
copy-paste form nuscens
'''
sample_data_token
=
sample_record
[
'data'
][
cam
]
sd_record
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
cs_record
=
nusc
.
get
(
'calibrated_sensor'
,
sd_record
[
'calibrated_sensor_token'
])
sensor_record
=
nusc
.
get
(
'sensor'
,
cs_record
[
'sensor_token'
])
pose_record
=
nusc
.
get
(
'ego_pose'
,
sd_record
[
'ego_pose_token'
])
cam_intrinsic
=
np
.
array
(
cs_record
[
'camera_intrinsic'
])
imsize
=
(
sd_record
[
'width'
],
sd_record
[
'height'
])
new_box
=
Box
(
box
.
translation
,
box
.
size
,
Quaternion
(
box
.
rotation
),
name
=
box
.
detection_name
,
token
=
''
)
# Move box to ego vehicle coord system.
new_box
.
translate
(
-
np
.
array
(
pose_record
[
'translation'
]))
new_box
.
rotate
(
Quaternion
(
pose_record
[
'rotation'
]).
inverse
)
# Move box to sensor coord system.
new_box
.
translate
(
-
np
.
array
(
cs_record
[
'translation'
]))
new_box
.
rotate
(
Quaternion
(
cs_record
[
'rotation'
]).
inverse
)
if
center_in_image
(
new_box
,
cam_intrinsic
,
imsize
,
vis_level
=
BoxVisibility
.
ANY
):
count
+=
1
# if exist_corners_in_image_but_not_all(new_box, cam_intrinsic, imsize, vis_level=BoxVisibility.ANY):
# count += 1
if
count
>
1
:
with
open
(
'center_overlap.txt'
,
'a'
)
as
f
:
try
:
f
.
write
(
box
.
token
+
'
\n
'
)
except
:
pass
filtered_boxes
.
append
(
box
)
anns_filter
+=
len
(
filtered_boxes
)
eval_boxes
.
boxes
[
sample_token
]
=
filtered_boxes
verbose
=
True
if
verbose
:
print
(
"=> Original number of boxes: %d"
%
total
)
print
(
"=> After anns based filtering: %d"
%
anns_filter
)
return
eval_boxes
class
NuScenesEval_custom
(
NuScenesEval
):
"""
Dummy class for backward-compatibility. Same as DetectionEval.
"""
def
__init__
(
self
,
nusc
:
NuScenes
,
config
:
DetectionConfig
,
result_path
:
str
,
eval_set
:
str
,
output_dir
:
str
=
None
,
verbose
:
bool
=
True
,
overlap_test
=
False
,
eval_mask
=
False
,
data_infos
=
None
):
"""
Initialize a DetectionEval object.
:param nusc: A NuScenes object.
:param config: A DetectionConfig object.
:param result_path: Path of the nuScenes JSON result file.
:param eval_set: The dataset split to evaluate on, e.g. train, val or test.
:param output_dir: Folder to save plots and results to.
:param verbose: Whether to print to stdout.
"""
self
.
nusc
=
nusc
self
.
result_path
=
result_path
self
.
eval_set
=
eval_set
self
.
output_dir
=
output_dir
self
.
verbose
=
verbose
self
.
cfg
=
config
self
.
overlap_test
=
overlap_test
self
.
eval_mask
=
eval_mask
self
.
data_infos
=
data_infos
# Check result file exists.
assert
os
.
path
.
exists
(
result_path
),
'Error: The result file does not exist!'
# Make dirs.
self
.
plot_dir
=
os
.
path
.
join
(
self
.
output_dir
,
'plots'
)
if
not
os
.
path
.
isdir
(
self
.
output_dir
):
os
.
makedirs
(
self
.
output_dir
)
if
not
os
.
path
.
isdir
(
self
.
plot_dir
):
os
.
makedirs
(
self
.
plot_dir
)
# Load data.
if
verbose
:
print
(
'Initializing nuScenes detection evaluation'
)
self
.
pred_boxes
,
self
.
meta
=
load_prediction
(
self
.
result_path
,
self
.
cfg
.
max_boxes_per_sample
,
DetectionBox
,
verbose
=
verbose
)
self
.
gt_boxes
=
load_gt
(
self
.
nusc
,
self
.
eval_set
,
DetectionBox_modified
,
verbose
=
verbose
)
assert
set
(
self
.
pred_boxes
.
sample_tokens
)
==
set
(
self
.
gt_boxes
.
sample_tokens
),
\
"Samples in split doesn't match samples in predictions."
# Add center distances.
self
.
pred_boxes
=
add_center_dist
(
nusc
,
self
.
pred_boxes
)
self
.
gt_boxes
=
add_center_dist
(
nusc
,
self
.
gt_boxes
)
# Filter boxes (distance, points per box, etc.).
if
verbose
:
print
(
'Filtering predictions'
)
self
.
pred_boxes
=
filter_eval_boxes
(
nusc
,
self
.
pred_boxes
,
self
.
cfg
.
class_range
,
verbose
=
verbose
)
if
verbose
:
print
(
'Filtering ground truth annotations'
)
self
.
gt_boxes
=
filter_eval_boxes
(
nusc
,
self
.
gt_boxes
,
self
.
cfg
.
class_range
,
verbose
=
verbose
)
if
self
.
overlap_test
:
self
.
pred_boxes
=
filter_eval_boxes_by_overlap
(
self
.
nusc
,
self
.
pred_boxes
)
self
.
gt_boxes
=
filter_eval_boxes_by_overlap
(
self
.
nusc
,
self
.
gt_boxes
,
verbose
=
True
)
self
.
all_gt
=
copy
.
deepcopy
(
self
.
gt_boxes
)
self
.
all_preds
=
copy
.
deepcopy
(
self
.
pred_boxes
)
self
.
sample_tokens
=
self
.
gt_boxes
.
sample_tokens
self
.
index_map
=
{}
for
scene
in
nusc
.
scene
:
first_sample_token
=
scene
[
'first_sample_token'
]
sample
=
nusc
.
get
(
'sample'
,
first_sample_token
)
self
.
index_map
[
first_sample_token
]
=
1
index
=
2
while
sample
[
'next'
]
!=
''
:
sample
=
nusc
.
get
(
'sample'
,
sample
[
'next'
])
self
.
index_map
[
sample
[
'token'
]]
=
index
index
+=
1
def
update_gt
(
self
,
type_
=
'vis'
,
visibility
=
'1'
,
index
=
1
):
if
type_
==
'vis'
:
self
.
visibility_test
=
True
if
self
.
visibility_test
:
'''[{'description': 'visibility of whole object is between 0 and 40%',
'token': '1',
'level': 'v0-40'},
{'description': 'visibility of whole object is between 40 and 60%',
'token': '2',
'level': 'v40-60'},
{'description': 'visibility of whole object is between 60 and 80%',
'token': '3',
'level': 'v60-80'},
{'description': 'visibility of whole object is between 80 and 100%',
'token': '4',
'level': 'v80-100'}]'''
self
.
gt_boxes
=
filter_eval_boxes_by_visibility
(
self
.
all_gt
,
visibility
,
verbose
=
True
)
elif
type_
==
'ord'
:
valid_tokens
=
[
key
for
(
key
,
value
)
in
self
.
index_map
.
items
()
if
value
==
index
]
# from IPython import embed
# embed()
self
.
gt_boxes
=
filter_by_sample_token
(
self
.
all_gt
,
valid_tokens
)
self
.
pred_boxes
=
filter_by_sample_token
(
self
.
all_preds
,
valid_tokens
)
self
.
sample_tokens
=
self
.
gt_boxes
.
sample_tokens
def
evaluate
(
self
)
->
Tuple
[
DetectionMetrics
,
DetectionMetricDataList
]:
"""
Performs the actual evaluation.
:return: A tuple of high-level and the raw metric data.
"""
start_time
=
time
.
time
()
# -----------------------------------
# Step 1: Accumulate metric data for all classes and distance thresholds.
# -----------------------------------
if
self
.
verbose
:
print
(
'Accumulating metric data...'
)
metric_data_list
=
DetectionMetricDataList
()
# print(self.cfg.dist_fcn_callable, self.cfg.dist_ths)
# self.cfg.dist_ths = [0.3]
# self.cfg.dist_fcn_callable
for
class_name
in
self
.
cfg
.
class_names
:
for
dist_th
in
self
.
cfg
.
dist_ths
:
md
=
accumulate
(
self
.
gt_boxes
,
self
.
pred_boxes
,
class_name
,
self
.
cfg
.
dist_fcn_callable
,
dist_th
)
metric_data_list
.
set
(
class_name
,
dist_th
,
md
)
# -----------------------------------
# Step 2: Calculate metrics from the data.
# -----------------------------------
if
self
.
verbose
:
print
(
'Calculating metrics...'
)
metrics
=
DetectionMetrics
(
self
.
cfg
)
for
class_name
in
self
.
cfg
.
class_names
:
# Compute APs.
for
dist_th
in
self
.
cfg
.
dist_ths
:
metric_data
=
metric_data_list
[(
class_name
,
dist_th
)]
ap
=
calc_ap
(
metric_data
,
self
.
cfg
.
min_recall
,
self
.
cfg
.
min_precision
)
metrics
.
add_label_ap
(
class_name
,
dist_th
,
ap
)
# Compute TP metrics.
for
metric_name
in
TP_METRICS
:
metric_data
=
metric_data_list
[(
class_name
,
self
.
cfg
.
dist_th_tp
)]
if
class_name
in
[
'traffic_cone'
]
and
metric_name
in
[
'attr_err'
,
'vel_err'
,
'orient_err'
]:
tp
=
np
.
nan
elif
class_name
in
[
'barrier'
]
and
metric_name
in
[
'attr_err'
,
'vel_err'
]:
tp
=
np
.
nan
else
:
tp
=
calc_tp
(
metric_data
,
self
.
cfg
.
min_recall
,
metric_name
)
metrics
.
add_label_tp
(
class_name
,
metric_name
,
tp
)
# Compute evaluation time.
metrics
.
add_runtime
(
time
.
time
()
-
start_time
)
return
metrics
,
metric_data_list
def
render
(
self
,
metrics
:
DetectionMetrics
,
md_list
:
DetectionMetricDataList
)
->
None
:
"""
Renders various PR and TP curves.
:param metrics: DetectionMetrics instance.
:param md_list: DetectionMetricDataList instance.
"""
if
self
.
verbose
:
print
(
'Rendering PR and TP curves'
)
def
savepath
(
name
):
return
os
.
path
.
join
(
self
.
plot_dir
,
name
+
'.pdf'
)
summary_plot
(
md_list
,
metrics
,
min_precision
=
self
.
cfg
.
min_precision
,
min_recall
=
self
.
cfg
.
min_recall
,
dist_th_tp
=
self
.
cfg
.
dist_th_tp
,
savepath
=
savepath
(
'summary'
))
for
detection_name
in
self
.
cfg
.
class_names
:
class_pr_curve
(
md_list
,
metrics
,
detection_name
,
self
.
cfg
.
min_precision
,
self
.
cfg
.
min_recall
,
savepath
=
savepath
(
detection_name
+
'_pr'
))
class_tp_curve
(
md_list
,
metrics
,
detection_name
,
self
.
cfg
.
min_recall
,
self
.
cfg
.
dist_th_tp
,
savepath
=
savepath
(
detection_name
+
'_tp'
))
for
dist_th
in
self
.
cfg
.
dist_ths
:
dist_pr_curve
(
md_list
,
metrics
,
dist_th
,
self
.
cfg
.
min_precision
,
self
.
cfg
.
min_recall
,
savepath
=
savepath
(
'dist_pr_'
+
str
(
dist_th
)))
if
__name__
==
"__main__"
:
# Settings.
parser
=
argparse
.
ArgumentParser
(
description
=
'Evaluate nuScenes detection results.'
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
'result_path'
,
type
=
str
,
help
=
'The submission as a JSON file.'
)
parser
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'~/nuscenes-metrics'
,
help
=
'Folder to store result metrics, graphs and example visualizations.'
)
parser
.
add_argument
(
'--eval_set'
,
type
=
str
,
default
=
'val'
,
help
=
'Which dataset split to evaluate on, train, val or test.'
)
parser
.
add_argument
(
'--dataroot'
,
type
=
str
,
default
=
'data/nuscenes'
,
help
=
'Default nuScenes data directory.'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
default
=
'v1.0-trainval'
,
help
=
'Which version of the nuScenes dataset to evaluate on, e.g. v1.0-trainval.'
)
parser
.
add_argument
(
'--config_path'
,
type
=
str
,
default
=
''
,
help
=
'Path to the configuration file.'
'If no path given, the CVPR 2019 configuration will be used.'
)
parser
.
add_argument
(
'--plot_examples'
,
type
=
int
,
default
=
0
,
help
=
'How many example visualizations to write to disk.'
)
parser
.
add_argument
(
'--render_curves'
,
type
=
int
,
default
=
1
,
help
=
'Whether to render PR and TP curves to disk.'
)
parser
.
add_argument
(
'--verbose'
,
type
=
int
,
default
=
1
,
help
=
'Whether to print to stdout.'
)
args
=
parser
.
parse_args
()
result_path_
=
os
.
path
.
expanduser
(
args
.
result_path
)
output_dir_
=
os
.
path
.
expanduser
(
args
.
output_dir
)
eval_set_
=
args
.
eval_set
dataroot_
=
args
.
dataroot
version_
=
args
.
version
config_path
=
args
.
config_path
plot_examples_
=
args
.
plot_examples
render_curves_
=
bool
(
args
.
render_curves
)
verbose_
=
bool
(
args
.
verbose
)
if
config_path
==
''
:
cfg_
=
config_factory
(
'detection_cvpr_2019'
)
else
:
with
open
(
config_path
,
'r'
)
as
_f
:
cfg_
=
DetectionConfig
.
deserialize
(
json
.
load
(
_f
))
nusc_
=
NuScenes
(
version
=
version_
,
verbose
=
verbose_
,
dataroot
=
dataroot_
)
nusc_eval
=
NuScenesEval_custom
(
nusc_
,
config
=
cfg_
,
result_path
=
result_path_
,
eval_set
=
eval_set_
,
output_dir
=
output_dir_
,
verbose
=
verbose_
)
for
vis
in
[
'1'
,
'2'
,
'3'
,
'4'
]:
nusc_eval
.
update_gt
(
type_
=
'vis'
,
visibility
=
vis
)
print
(
f
'================
{
vis
}
==============='
)
nusc_eval
.
main
(
plot_examples
=
plot_examples_
,
render_curves
=
render_curves_
)
#for index in range(1, 41):
# nusc_eval.update_gt(type_='ord', index=index)
#
projects/mmdet3d_plugin/datasets/pipelines/__init__.py
0 → 100644
View file @
4cd43886
from
.transform_3d
import
(
PadMultiViewImage
,
NormalizeMultiviewImage
,
PhotoMetricDistortionMultiViewImage
,
CustomCollect3D
,
RandomScaleImageMultiViewImage
)
from
.formating
import
CustomDefaultFormatBundle3D
from
.augmentation
import
(
CropResizeFlipImage
,
GlobalRotScaleTransImage
)
from
.dd3d_mapper
import
DD3DMapper
__all__
=
[
'PadMultiViewImage'
,
'NormalizeMultiviewImage'
,
'PhotoMetricDistortionMultiViewImage'
,
'CustomDefaultFormatBundle3D'
,
'CustomCollect3D'
,
'RandomScaleImageMultiViewImage'
,
'CropResizeFlipImage'
,
'GlobalRotScaleTransImage'
,
'DD3DMapper'
,
]
\ No newline at end of file
projects/mmdet3d_plugin/datasets/pipelines/augmentation.py
0 → 100644
View file @
4cd43886
import
numpy
as
np
import
torch
import
mmcv
from
mmdet.datasets.builder
import
PIPELINES
from
PIL
import
Image
import
random
@
PIPELINES
.
register_module
()
class
CropResizeFlipImage
(
object
):
"""Fixed Crop and then randim resize and flip the image. Note the flip requires to flip the feature in the network
ida_aug_conf = {
"reisze": [576, 608, 640, 672, 704] # stride of 32 based on 640 (0.9, 1.1)
"reisze": [512, 544, 576, 608, 640, 672, 704, 736, 768] # (0.8, 1.2)
"reisze": [448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832] # (0.7, 1.3)
"crop": (0, 260, 1600, 900),
"H": 900,
"W": 1600,
"rand_flip": True,
}
Args:
size (tuple, optional): Fixed padding size.
"""
def
__init__
(
self
,
data_aug_conf
=
None
,
training
=
True
,
debug
=
False
):
self
.
data_aug_conf
=
data_aug_conf
self
.
training
=
training
self
.
debug
=
debug
def
__call__
(
self
,
results
):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
if
not
'aug_param'
in
results
.
keys
():
results
[
'aug_param'
]
=
{}
imgs
=
results
[
"img"
]
N
=
len
(
imgs
)
new_imgs
=
[]
resize
,
resize_dims
,
crop
,
flip
=
self
.
_sample_augmentation
(
results
)
if
self
.
debug
:
# unique id per img
from
uuid
import
uuid4
uid
=
uuid4
()
# lidar is RFU in nuscenes
lidar_pts
=
np
.
array
([
[
10
,
30
,
-
2
,
1
],
[
-
10
,
30
,
-
2
,
1
],
[
5
,
15
,
-
2
,
1
],
[
-
5
,
15
,
-
2
,
1
],
[
30
,
0
,
-
2
,
1
],
[
-
30
,
0
,
-
2
,
1
],
[
10
,
-
30
,
-
2
,
1
],
[
-
10
,
-
30
,
-
2
,
1
]
],
dtype
=
np
.
float32
).
T
for
i
in
range
(
N
):
img
=
Image
.
fromarray
(
np
.
uint8
(
imgs
[
i
]))
if
self
.
debug
:
pts_to_img_pre_aug
=
results
[
'lidar2img'
][
i
]
@
lidar_pts
pts_to_img_pre_aug
=
pts_to_img_pre_aug
/
pts_to_img_pre_aug
[
2
:
3
,
:]
# div by the depth component in homogenous vector
img_copy
=
Image
.
fromarray
(
np
.
uint8
(
imgs
[
i
]))
for
j
in
range
(
pts_to_img_pre_aug
.
shape
[
1
]):
x
,
y
=
int
(
pts_to_img_pre_aug
[
0
,
j
]),
int
(
pts_to_img_pre_aug
[
1
,
j
])
if
(
0
<
x
<
img_copy
.
width
)
and
(
0
<
y
<
img_copy
.
height
):
img_copy
.
putpixel
((
x
-
1
,
y
-
1
),
(
255
,
0
,
0
))
img_copy
.
putpixel
((
x
-
1
,
y
),
(
255
,
0
,
0
))
img_copy
.
putpixel
((
x
-
1
,
y
+
1
),
(
255
,
0
,
0
))
img_copy
.
putpixel
((
x
,
y
-
1
),
(
0
,
255
,
0
))
img_copy
.
putpixel
((
x
,
y
),
(
0
,
255
,
0
))
img_copy
.
putpixel
((
x
,
y
+
1
),
(
0
,
255
,
0
))
img_copy
.
putpixel
((
x
+
1
,
y
-
1
),
(
0
,
0
,
255
))
img_copy
.
putpixel
((
x
+
1
,
y
),
(
0
,
0
,
255
))
img_copy
.
putpixel
((
x
+
1
,
y
+
1
),
(
0
,
0
,
255
))
img_copy
.
save
(
f
'pre_aug_
{
uid
}
_
{
i
}
.png'
)
# augmentation (resize, crop, horizontal flip, rotate)
# resize, resize_dims, crop, flip, rotate = self._sample_augmentation() ###different view use different aug (BEV Det)
img
,
ida_mat
=
self
.
_img_transform
(
img
,
resize
=
resize
,
resize_dims
=
resize_dims
,
crop
=
crop
,
flip
=
flip
,
)
new_imgs
.
append
(
np
.
array
(
img
).
astype
(
np
.
float32
))
results
[
'cam2img'
][
i
][:
3
,
:
3
]
=
np
.
matmul
(
ida_mat
,
results
[
'cam2img'
][
i
][:
3
,
:
3
])
if
self
.
debug
:
pts_to_img_post_aug
=
np
.
matmul
(
results
[
'cam2img'
][
i
],
results
[
'lidar2cam'
][
i
])
@
lidar_pts
pts_to_img_post_aug
=
pts_to_img_post_aug
/
pts_to_img_post_aug
[
2
:
3
,
:]
# div by the depth component in homogenous vector
for
j
in
range
(
pts_to_img_post_aug
.
shape
[
1
]):
x
,
y
=
int
(
pts_to_img_post_aug
[
0
,
j
]),
int
(
pts_to_img_post_aug
[
1
,
j
])
if
(
0
<
x
<
img
.
width
)
and
(
0
<
y
<
img
.
height
):
img
.
putpixel
((
x
-
1
,
y
-
1
),
(
255
,
0
,
0
))
img
.
putpixel
((
x
-
1
,
y
),
(
255
,
0
,
0
))
img
.
putpixel
((
x
-
1
,
y
+
1
),
(
255
,
0
,
0
))
img
.
putpixel
((
x
,
y
-
1
),
(
0
,
255
,
0
))
img
.
putpixel
((
x
,
y
),
(
0
,
255
,
0
))
img
.
putpixel
((
x
,
y
+
1
),
(
0
,
255
,
0
))
img
.
putpixel
((
x
+
1
,
y
-
1
),
(
0
,
0
,
255
))
img
.
putpixel
((
x
+
1
,
y
),
(
0
,
0
,
255
))
img
.
putpixel
((
x
+
1
,
y
+
1
),
(
0
,
0
,
255
))
img
.
save
(
f
'post_aug_
{
uid
}
_
{
i
}
.png'
)
if
'mono_ann_idx'
in
results
.
keys
():
# apply transform to dd3d intrinsics
if
i
in
results
[
'mono_ann_idx'
].
data
:
mono_index
=
results
[
'mono_ann_idx'
].
data
.
index
(
i
)
intrinsics
=
results
[
'mono_input_dict'
][
mono_index
][
'intrinsics'
]
if
torch
.
is_tensor
(
intrinsics
):
intrinsics
=
intrinsics
.
numpy
().
reshape
(
3
,
3
).
astype
(
np
.
float32
)
elif
isinstance
(
intrinsics
,
np
.
ndarray
):
intrinsics
=
intrinsics
.
reshape
(
3
,
3
).
astype
(
np
.
float32
)
else
:
intrinsics
=
np
.
array
(
intrinsics
,
dtype
=
np
.
float32
).
reshape
(
3
,
3
)
results
[
'mono_input_dict'
][
mono_index
][
'intrinsics'
]
=
np
.
matmul
(
ida_mat
,
intrinsics
)
results
[
'mono_input_dict'
][
mono_index
][
'height'
]
=
img
.
size
[
1
]
results
[
'mono_input_dict'
][
mono_index
][
'width'
]
=
img
.
size
[
0
]
# apply transform to dd3d box
for
ann
in
results
[
'mono_input_dict'
][
mono_index
][
'annotations'
]:
# bbox_mode = BoxMode.XYXY_ABS
box
=
self
.
_box_transform
(
ann
[
'bbox'
],
resize
,
crop
,
flip
,
img
.
size
[
0
])[
0
]
box
=
box
.
clip
(
min
=
0
)
box
=
np
.
minimum
(
box
,
list
(
img
.
size
+
img
.
size
))
ann
[
"bbox"
]
=
box
results
[
"img"
]
=
new_imgs
results
[
'lidar2img'
]
=
[
np
.
matmul
(
results
[
'cam2img'
][
i
],
results
[
'lidar2cam'
][
i
])
for
i
in
range
(
len
(
results
[
'lidar2cam'
]))]
return
results
def
_box_transform
(
self
,
box
,
resize
,
crop
,
flip
,
img_width
):
box
=
np
.
array
([
box
])
idxs
=
np
.
array
([(
0
,
1
),
(
2
,
1
),
(
0
,
3
),
(
2
,
3
)]).
flatten
()
coords
=
np
.
asarray
(
box
).
reshape
(
-
1
,
4
)[:,
idxs
].
reshape
(
-
1
,
2
)
# crop
coords
[:,
0
]
-=
crop
[
0
]
coords
[:,
1
]
-=
crop
[
1
]
# resize
coords
[:,
0
]
=
coords
[:,
0
]
*
resize
coords
[:,
1
]
=
coords
[:,
1
]
*
resize
coords
=
coords
.
reshape
((
-
1
,
4
,
2
))
minxy
=
coords
.
min
(
axis
=
1
)
maxxy
=
coords
.
max
(
axis
=
1
)
trans_box
=
np
.
concatenate
((
minxy
,
maxxy
),
axis
=
1
)
return
trans_box
def
_img_transform
(
self
,
img
,
resize
,
resize_dims
,
crop
,
flip
):
ida_rot
=
np
.
eye
(
2
)
ida_tran
=
np
.
zeros
(
2
)
# adjust image
img
=
img
.
crop
(
crop
)
img
=
img
.
resize
(
resize_dims
)
if
flip
:
img
=
img
.
transpose
(
method
=
Image
.
FLIP_LEFT_RIGHT
)
# post-homography transformation
ida_rot
*=
resize
ida_tran
-=
np
.
array
(
crop
[:
2
])
*
resize
ida_mat
=
np
.
eye
(
3
)
ida_mat
[:
2
,
:
2
]
=
ida_rot
ida_mat
[:
2
,
2
]
=
ida_tran
return
img
,
ida_mat
def
_sample_augmentation
(
self
,
results
):
if
'CropResizeFlipImage_param'
in
results
[
'aug_param'
].
keys
():
return
results
[
'aug_param'
][
'CropResizeFlipImage_param'
]
crop
=
self
.
data_aug_conf
[
"crop"
]
if
self
.
training
:
resized_h
=
random
.
choice
(
self
.
data_aug_conf
[
"reisze"
])
resized_w
=
resized_h
/
(
crop
[
3
]
-
crop
[
1
])
*
(
crop
[
2
]
-
crop
[
0
])
resize
=
resized_h
/
(
crop
[
3
]
-
crop
[
1
])
resize_dims
=
(
int
(
resized_w
),
int
(
resized_h
))
flip
=
False
if
self
.
data_aug_conf
[
"rand_flip"
]
and
np
.
random
.
choice
([
0
,
1
]):
flip
=
True
else
:
resized_h
=
random
.
choice
(
self
.
data_aug_conf
[
"reisze"
])
assert
len
(
self
.
data_aug_conf
[
"reisze"
])
==
1
resized_w
=
resized_h
/
(
crop
[
3
]
-
crop
[
1
])
*
(
crop
[
2
]
-
crop
[
0
])
resize
=
resized_h
/
(
crop
[
3
]
-
crop
[
1
])
resize_dims
=
(
int
(
resized_w
),
int
(
resized_h
))
flip
=
False
results
[
'aug_param'
][
'CropResizeFlipImage_param'
]
=
(
resize
,
resize_dims
,
crop
,
flip
)
return
resize
,
resize_dims
,
crop
,
flip
@
PIPELINES
.
register_module
()
class
GlobalRotScaleTransImage
(
object
):
"""Random resize, Crop and flip the image
Args:
size (tuple, optional): Fixed padding size.
"""
def
__init__
(
self
,
rot_range
=
[
-
0.3925
,
0.3925
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
],
reverse_angle
=
False
,
training
=
True
,
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
,
only_gt
=
False
,
):
self
.
rot_range
=
rot_range
self
.
scale_ratio_range
=
scale_ratio_range
self
.
translation_std
=
translation_std
self
.
reverse_angle
=
reverse_angle
self
.
training
=
training
self
.
flip_dx_ratio
=
flip_dx_ratio
self
.
flip_dy_ratio
=
flip_dy_ratio
self
.
only_gt
=
only_gt
def
__call__
(
self
,
results
):
"""Call function to pad images, masks, semantic segmentation maps.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Updated result dict.
"""
if
not
'aug_param'
in
results
.
keys
():
results
[
'aug_param'
]
=
{}
rot_angle
,
scale_ratio
,
flip_dx
,
flip_dy
,
_
,
_
=
self
.
_sample_augmentation
(
results
)
# random rotate
if
not
self
.
only_gt
:
self
.
rotate_bev_along_z
(
results
,
rot_angle
)
if
self
.
reverse_angle
:
rot_angle
*=
-
1
results
[
"gt_bboxes_3d"
].
rotate
(
np
.
array
(
rot_angle
)
)
# random scale
if
not
self
.
only_gt
:
self
.
scale_xyz
(
results
,
scale_ratio
)
results
[
"gt_bboxes_3d"
].
scale
(
scale_ratio
)
# random flip
if
flip_dx
:
if
not
self
.
only_gt
:
self
.
flip_along_x
(
results
)
results
[
"gt_bboxes_3d"
].
flip
(
bev_direction
=
'vertical'
)
if
flip_dy
:
if
not
self
.
only_gt
:
self
.
flip_along_y
(
results
)
results
[
"gt_bboxes_3d"
].
flip
(
bev_direction
=
'horizontal'
)
# TODO: support translation
return
results
def
_sample_augmentation
(
self
,
results
):
if
'GlobalRotScaleTransImage_param'
in
results
[
'aug_param'
].
keys
():
return
results
[
'aug_param'
][
'GlobalRotScaleTransImage_param'
]
else
:
rot_angle
=
np
.
random
.
uniform
(
*
self
.
rot_range
)
/
180
*
np
.
pi
scale_ratio
=
np
.
random
.
uniform
(
*
self
.
scale_ratio_range
)
flip_dx
=
np
.
random
.
uniform
()
<
self
.
flip_dx_ratio
flip_dy
=
np
.
random
.
uniform
()
<
self
.
flip_dy_ratio
# generate bda_mat
rot_sin
=
torch
.
sin
(
torch
.
tensor
(
rot_angle
))
rot_cos
=
torch
.
cos
(
torch
.
tensor
(
rot_angle
))
rot_mat
=
torch
.
Tensor
([[
rot_cos
,
-
rot_sin
,
0
],
[
rot_sin
,
rot_cos
,
0
],
[
0
,
0
,
1
]])
scale_mat
=
torch
.
Tensor
([[
scale_ratio
,
0
,
0
],
[
0
,
scale_ratio
,
0
],
[
0
,
0
,
scale_ratio
]])
flip_mat
=
torch
.
Tensor
([[
1
,
0
,
0
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]])
if
flip_dx
:
flip_mat
=
flip_mat
@
torch
.
Tensor
([[
-
1
,
0
,
0
],
[
0
,
1
,
0
],
[
0
,
0
,
1
]])
if
flip_dy
:
flip_mat
=
flip_mat
@
torch
.
Tensor
([[
1
,
0
,
0
],
[
0
,
-
1
,
0
],
[
0
,
0
,
1
]])
bda_mat
=
flip_mat
@
(
scale_mat
@
rot_mat
)
bda_mat
=
torch
.
inverse
(
bda_mat
)
results
[
'aug_param'
][
'GlobalRotScaleTransImage_param'
]
=
(
rot_angle
,
scale_ratio
,
flip_dx
,
flip_dy
,
bda_mat
,
self
.
only_gt
)
return
rot_angle
,
scale_ratio
,
flip_dx
,
flip_dy
,
bda_mat
,
self
.
only_gt
def
rotate_bev_along_z
(
self
,
results
,
angle
):
rot_cos
=
np
.
cos
(
angle
)
rot_sin
=
np
.
sin
(
angle
)
rot_mat
=
np
.
array
([[
rot_cos
,
-
rot_sin
,
0
,
0
],
[
rot_sin
,
rot_cos
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
]])
rot_mat_inv
=
np
.
linalg
.
inv
(
rot_mat
)
num_view
=
len
(
results
[
"lidar2img"
])
for
view
in
range
(
num_view
):
results
[
"lidar2img"
][
view
]
=
np
.
matmul
(
results
[
"lidar2img"
][
view
],
rot_mat_inv
)
results
[
'lidar2cam'
][
view
]
=
np
.
matmul
(
results
[
'lidar2cam'
][
view
],
rot_mat_inv
)
return
def
scale_xyz
(
self
,
results
,
scale_ratio
):
scale_mat
=
np
.
array
(
[
[
scale_ratio
,
0
,
0
,
0
],
[
0
,
scale_ratio
,
0
,
0
],
[
0
,
0
,
scale_ratio
,
0
],
[
0
,
0
,
0
,
1
],
]
)
scale_mat_inv
=
np
.
linalg
.
inv
(
scale_mat
)
num_view
=
len
(
results
[
"lidar2img"
])
for
view
in
range
(
num_view
):
results
[
"lidar2img"
][
view
]
=
np
.
matmul
(
results
[
"lidar2img"
][
view
],
scale_mat_inv
)
results
[
'lidar2cam'
][
view
]
=
np
.
matmul
(
results
[
'lidar2cam'
][
view
],
scale_mat_inv
)
return
def
flip_along_x
(
self
,
results
):
flip_mat
=
np
.
array
(
[
[
-
1
,
0
,
0
,
0
],
[
0
,
1
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
],
]
).
astype
(
np
.
float32
)
flip_mat_inv
=
np
.
linalg
.
inv
(
flip_mat
)
num_view
=
len
(
results
[
"lidar2img"
])
for
view
in
range
(
num_view
):
results
[
"lidar2img"
][
view
]
=
np
.
matmul
(
results
[
"lidar2img"
][
view
],
flip_mat_inv
)
results
[
'lidar2cam'
][
view
]
=
np
.
matmul
(
results
[
'lidar2cam'
][
view
],
flip_mat_inv
)
return
def
flip_along_y
(
self
,
results
):
flip_mat
=
np
.
array
(
[
[
1
,
0
,
0
,
0
],
[
0
,
-
1
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
],
]
).
astype
(
np
.
float32
)
flip_mat_inv
=
np
.
linalg
.
inv
(
flip_mat
)
num_view
=
len
(
results
[
"lidar2img"
])
for
view
in
range
(
num_view
):
results
[
"lidar2img"
][
view
]
=
np
.
matmul
(
results
[
"lidar2img"
][
view
],
flip_mat_inv
)
results
[
'lidar2cam'
][
view
]
=
np
.
matmul
(
results
[
'lidar2cam'
][
view
],
flip_mat_inv
)
return
projects/mmdet3d_plugin/datasets/pipelines/dd3d_mapper.py
0 → 100644
View file @
4cd43886
import
copy
import
numpy
as
np
import
torch
from
mmcv.parallel.data_container
import
DataContainer
as
DC
from
mmdet.datasets.builder
import
PIPELINES
from
projects.mmdet3d_plugin.dd3d.datasets.transform_utils
import
annotations_to_instances
from
projects.mmdet3d_plugin.dd3d.structures.pose
import
Pose
from
projects.mmdet3d_plugin.dd3d.utils.tasks
import
TaskManager
@
PIPELINES
.
register_module
()
class
DD3DMapper
:
def
__init__
(
self
,
is_train
:
bool
=
True
,
tasks
=
dict
(
box2d_on
=
True
,
box3d_on
=
True
),
):
self
.
is_train
=
is_train
self
.
task_manager
=
TaskManager
(
**
tasks
)
def
__call__
(
self
,
results
):
if
results
[
'mono_input_dict'
]
is
None
:
return
results
mono_input_dict
=
[]
for
dataset_dict
in
results
[
'mono_input_dict'
]:
dataset_dict
=
copy
.
deepcopy
(
dataset_dict
)
# it will be modified by code below
image_shape
=
results
[
'img'
].
data
.
shape
[
-
2
:]
intrinsics
=
None
if
"intrinsics"
in
dataset_dict
:
intrinsics
=
dataset_dict
[
'intrinsics'
]
if
not
torch
.
is_tensor
(
intrinsics
):
intrinsics
=
np
.
reshape
(
intrinsics
,
(
3
,
3
),
).
astype
(
np
.
float32
)
intrinsics
=
torch
.
as_tensor
(
intrinsics
)
# NOTE: intrinsics = transforms.apply_intrinsics(intrinsics)
dataset_dict
[
"intrinsics"
]
=
intrinsics
dataset_dict
[
"inv_intrinsics"
]
=
torch
.
linalg
.
inv
(
dataset_dict
[
'intrinsics'
])
if
"pose"
in
dataset_dict
:
pose
=
Pose
(
wxyz
=
np
.
float32
(
dataset_dict
[
"pose"
][
"wxyz"
]),
tvec
=
np
.
float32
(
dataset_dict
[
"pose"
][
"tvec"
]))
dataset_dict
[
"pose"
]
=
pose
# NOTE: no transforms affect global pose.
if
"extrinsics"
in
dataset_dict
:
extrinsics
=
Pose
(
wxyz
=
np
.
float32
(
dataset_dict
[
"extrinsics"
][
"wxyz"
]),
tvec
=
np
.
float32
(
dataset_dict
[
"extrinsics"
][
"tvec"
])
)
dataset_dict
[
"extrinsics"
]
=
extrinsics
if
not
self
.
task_manager
.
has_detection_task
:
dataset_dict
.
pop
(
"annotations"
,
None
)
if
"annotations"
in
dataset_dict
:
for
anno
in
dataset_dict
[
"annotations"
]:
if
not
self
.
task_manager
.
has_detection_task
:
anno
.
pop
(
"bbox"
,
None
)
anno
.
pop
(
"bbox_mode"
,
None
)
if
not
self
.
task_manager
.
box3d_on
:
anno
.
pop
(
"bbox3d"
,
None
)
annos
=
[
anno
for
anno
in
dataset_dict
[
"annotations"
]
if
anno
.
get
(
"iscrowd"
,
0
)
==
0
]
if
annos
and
'bbox3d'
in
annos
[
0
]:
# Remove boxes with negative z-value for center.
annos
=
[
anno
for
anno
in
annos
if
anno
[
'bbox3d'
][
6
]
>
0
]
instances
=
annotations_to_instances
(
annos
,
image_shape
,
# TODO: the effect of the shape?
intrinsics
=
intrinsics
.
numpy
(),
)
if
self
.
is_train
:
# instances = d2_utils.filter_empty_instances(instances)
m
=
instances
.
gt_boxes
.
nonempty
(
threshold
=
1e-5
)
instances
=
instances
[
m
]
annos
=
[
anno
for
tmp_m
,
anno
in
zip
(
m
,
annos
)
if
tmp_m
]
dataset_dict
[
"instances"
]
=
instances
dataset_dict
[
'annotations'
]
=
annos
mono_input_dict
.
append
(
dataset_dict
)
# TODO: drop batch that has no annotations?
box_num
=
0
for
dataset_dict
in
mono_input_dict
:
box_num
+=
dataset_dict
[
"instances"
].
gt_boxes
.
tensor
.
shape
[
0
]
if
box_num
==
0
:
return
None
mono_input_dict
=
DC
(
mono_input_dict
,
cpu_only
=
True
)
results
[
'mono_input_dict'
]
=
mono_input_dict
return
results
Prev
1
2
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment