Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
007f2e68
Commit
007f2e68
authored
Apr 08, 2026
by
雍大凯
Browse files
将子模块转换为普通目录
parent
19472568
Changes
192
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3010 additions
and
0 deletions
+3010
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py
...mer/projects/mmdet3d_plugin/datasets/samplers/__init__.py
+4
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
...s/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
+41
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
...rojects/mmdet3d_plugin/datasets/samplers/group_sampler.py
+110
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py
...rmer/projects/mmdet3d_plugin/datasets/samplers/sampler.py
+7
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py
...Former/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py
+1
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py
...VFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py
+0
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py
...VFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py
+360
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py
.../projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py
+136
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py
...BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py
+71
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py
...rmer/projects/mmdet3d_plugin/dd3d/layers/normalization.py
+40
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py
...mer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py
+80
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py
...VFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py
+1
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py
...r/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py
+217
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py
...s/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py
+46
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py
...BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py
+382
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py
...BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py
+427
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py
...er/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py
+522
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py
.../projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py
+242
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py
...ormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py
+2
-0
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py
...Former/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py
+321
-0
No files found.
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/__init__.py
0 → 100644
View file @
007f2e68
from
.group_sampler
import
DistributedGroupSampler
from
.distributed_sampler
import
DistributedSampler
from
.sampler
import
SAMPLER
,
build_sampler
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
0 → 100644
View file @
007f2e68
import
math
import
torch
from
torch.utils.data
import
DistributedSampler
as
_DistributedSampler
from
.sampler
import
SAMPLER
@
SAMPLER
.
register_module
()
class
DistributedSampler
(
_DistributedSampler
):
def
__init__
(
self
,
dataset
=
None
,
num_replicas
=
None
,
rank
=
None
,
shuffle
=
True
,
seed
=
0
):
super
().
__init__
(
dataset
,
num_replicas
=
num_replicas
,
rank
=
rank
,
shuffle
=
shuffle
)
# for the compatibility from PyTorch 1.3+
self
.
seed
=
seed
if
seed
is
not
None
else
0
def
__iter__
(
self
):
# deterministically shuffle based on epoch
if
self
.
shuffle
:
assert
False
else
:
indices
=
torch
.
arange
(
len
(
self
.
dataset
)).
tolist
()
# add extra samples to make it evenly divisible
# in case that indices is shorter than half of total_size
indices
=
(
indices
*
math
.
ceil
(
self
.
total_size
/
len
(
indices
)))[:
self
.
total_size
]
assert
len
(
indices
)
==
self
.
total_size
# subsample
per_replicas
=
self
.
total_size
//
self
.
num_replicas
# indices = indices[self.rank:self.total_size:self.num_replicas]
indices
=
indices
[
self
.
rank
*
per_replicas
:(
self
.
rank
+
1
)
*
per_replicas
]
assert
len
(
indices
)
==
self
.
num_samples
return
iter
(
indices
)
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
0 → 100644
View file @
007f2e68
# Copyright (c) OpenMMLab. All rights reserved.
import
math
import
numpy
as
np
import
torch
from
mmcv.runner
import
get_dist_info
from
torch.utils.data
import
Sampler
from
.sampler
import
SAMPLER
import
random
from
IPython
import
embed
@
SAMPLER
.
register_module
()
class
DistributedGroupSampler
(
Sampler
):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
seed (int, optional): random seed used to shuffle the sampler if
``shuffle=True``. This number should be identical across all
processes in the distributed group. Default: 0.
"""
def
__init__
(
self
,
dataset
,
samples_per_gpu
=
1
,
num_replicas
=
None
,
rank
=
None
,
seed
=
0
):
_rank
,
_num_replicas
=
get_dist_info
()
if
num_replicas
is
None
:
num_replicas
=
_num_replicas
if
rank
is
None
:
rank
=
_rank
self
.
dataset
=
dataset
self
.
samples_per_gpu
=
samples_per_gpu
self
.
num_replicas
=
num_replicas
self
.
rank
=
rank
self
.
epoch
=
0
self
.
seed
=
seed
if
seed
is
not
None
else
0
assert
hasattr
(
self
.
dataset
,
'flag'
)
self
.
flag
=
self
.
dataset
.
flag
self
.
group_sizes
=
np
.
bincount
(
self
.
flag
)
self
.
num_samples
=
0
for
i
,
j
in
enumerate
(
self
.
group_sizes
):
self
.
num_samples
+=
int
(
math
.
ceil
(
self
.
group_sizes
[
i
]
*
1.0
/
self
.
samples_per_gpu
/
self
.
num_replicas
))
*
self
.
samples_per_gpu
self
.
total_size
=
self
.
num_samples
*
self
.
num_replicas
def
__iter__
(
self
):
# deterministically shuffle based on epoch
g
=
torch
.
Generator
()
g
.
manual_seed
(
self
.
epoch
+
self
.
seed
)
indices
=
[]
for
i
,
size
in
enumerate
(
self
.
group_sizes
):
if
size
>
0
:
indice
=
np
.
where
(
self
.
flag
==
i
)[
0
]
assert
len
(
indice
)
==
size
# add .numpy() to avoid bug when selecting indice in parrots.
# TODO: check whether torch.randperm() can be replaced by
# numpy.random.permutation().
indice
=
indice
[
list
(
torch
.
randperm
(
int
(
size
),
generator
=
g
).
numpy
())].
tolist
()
extra
=
int
(
math
.
ceil
(
size
*
1.0
/
self
.
samples_per_gpu
/
self
.
num_replicas
)
)
*
self
.
samples_per_gpu
*
self
.
num_replicas
-
len
(
indice
)
# pad indice
tmp
=
indice
.
copy
()
for
_
in
range
(
extra
//
size
):
indice
.
extend
(
tmp
)
indice
.
extend
(
tmp
[:
extra
%
size
])
indices
.
extend
(
indice
)
assert
len
(
indices
)
==
self
.
total_size
indices
=
[
indices
[
j
]
for
i
in
list
(
torch
.
randperm
(
len
(
indices
)
//
self
.
samples_per_gpu
,
generator
=
g
))
for
j
in
range
(
i
*
self
.
samples_per_gpu
,
(
i
+
1
)
*
self
.
samples_per_gpu
)
]
# subsample
offset
=
self
.
num_samples
*
self
.
rank
indices
=
indices
[
offset
:
offset
+
self
.
num_samples
]
assert
len
(
indices
)
==
self
.
num_samples
return
iter
(
indices
)
def
__len__
(
self
):
return
self
.
num_samples
def
set_epoch
(
self
,
epoch
):
self
.
epoch
=
epoch
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/datasets/samplers/sampler.py
0 → 100644
View file @
007f2e68
from
mmcv.utils.registry
import
Registry
,
build_from_cfg
SAMPLER
=
Registry
(
'sampler'
)
def
build_sampler
(
cfg
,
default_args
):
return
build_from_cfg
(
cfg
,
SAMPLER
,
default_args
)
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/__init__.py
0 → 100644
View file @
007f2e68
from
.modeling
import
*
\ No newline at end of file
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/__init__.py
0 → 100644
View file @
007f2e68
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/nuscenes.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
#import functools
from
collections
import
OrderedDict
import
numpy
as
np
import
seaborn
as
sns
from
torch.utils.data
import
Dataset
from
tqdm
import
tqdm
#from detectron2.data import MetadataCatalog
from
detectron2.structures.boxes
import
BoxMode
from
nuscenes.eval.detection.utils
import
category_to_detection_name
from
nuscenes.nuscenes
import
NuScenes
from
nuscenes.utils.splits
import
create_splits_scenes
#from tridet.data import collect_dataset_dicts
from
projects.mmdet3d_plugin.dd3d.structures.boxes3d
import
GenericBoxes3D
from
projects.mmdet3d_plugin.dd3d.structures.pose
import
Pose
from
projects.mmdet3d_plugin.dd3d.utils.geometry
import
project_points3d
from
projects.mmdet3d_plugin.dd3d.utils.visualization
import
float_to_uint8_color
# https://github.com/nutonomy/nuscenes-devkit/blob/9b209638ef3dee6d0cdc5ac700c493747f5b35fe/python-sdk/nuscenes/utils/splits.py#L189
# - train/val/test: The standard splits of the nuScenes dataset (700/150/150 scenes).
# - mini_train/mini_val: Train and val splits of the mini subset used for visualization and debugging (8/2 scenes).
# - train_detect/train_track: Two halves of the train split used for separating the training sets of detector and
# tracker if required
DATASET_NAME_TO_VERSION
=
{
"nusc_train"
:
"v1.0-trainval"
,
"nusc_val"
:
"v1.0-trainval"
,
"nusc_val-subsample-8"
:
"v1.0-trainval"
,
"nusc_trainval"
:
"v1.0-trainval"
,
"nusc_test"
:
"v1.0-test"
,
"nusc_mini_train"
:
"v1.0-mini"
,
"nusc_mini_val"
:
"v1.0-mini"
,
}
CAMERA_NAMES
=
(
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
)
ATTRIBUTE_IDS
=
{
'vehicle.moving'
:
0
,
'vehicle.parked'
:
1
,
'vehicle.stopped'
:
2
,
'pedestrian.moving'
:
0
,
'pedestrian.standing'
:
1
,
'pedestrian.sitting_lying_down'
:
2
,
'cycle.with_rider'
:
0
,
'cycle.without_rider'
:
1
,
}
CATEGORY_IDS
=
OrderedDict
({
'barrier'
:
0
,
'bicycle'
:
1
,
'bus'
:
2
,
'car'
:
3
,
'construction_vehicle'
:
4
,
'motorcycle'
:
5
,
'pedestrian'
:
6
,
'traffic_cone'
:
7
,
'trailer'
:
8
,
'truck'
:
9
,
})
COLORS
=
[
float_to_uint8_color
(
clr
)
for
clr
in
sns
.
color_palette
(
"bright"
,
n_colors
=
10
)]
COLORMAP
=
OrderedDict
({
'barrier'
:
COLORS
[
8
],
# yellow
'bicycle'
:
COLORS
[
0
],
# blue
'bus'
:
COLORS
[
6
],
# pink
'car'
:
COLORS
[
2
],
# green
'construction_vehicle'
:
COLORS
[
7
],
# gray
'motorcycle'
:
COLORS
[
4
],
# purple
'pedestrian'
:
COLORS
[
1
],
# orange
'traffic_cone'
:
COLORS
[
3
],
# red
'trailer'
:
COLORS
[
9
],
# skyblue
'truck'
:
COLORS
[
5
],
# brown
})
MAX_NUM_ATTRIBUTES
=
3
def
_compute_iou
(
box1
,
box2
):
"""
Parameters
----------
box1, box2:
(x1, y1, x2, y2)
"""
xx1
=
max
(
box1
[
0
],
box2
[
0
])
yy1
=
max
(
box1
[
1
],
box2
[
1
])
xx2
=
min
(
box1
[
2
],
box2
[
2
])
yy2
=
min
(
box1
[
3
],
box2
[
3
])
if
xx1
>=
xx2
or
yy1
>=
yy2
:
return
0.
inter
=
(
xx2
-
xx1
)
*
(
yy2
-
yy1
)
a1
=
(
box1
[
2
]
-
box1
[
0
])
*
(
box1
[
3
]
-
box1
[
1
])
a2
=
(
box2
[
2
]
-
box2
[
0
])
*
(
box2
[
3
]
-
box2
[
1
])
return
inter
/
(
a1
+
a2
-
inter
)
class
NuscenesDataset
(
Dataset
):
def
__init__
(
self
,
name
,
data_root
,
datum_names
=
CAMERA_NAMES
,
min_num_lidar_points
=
3
,
min_box_visibility
=
0.2
,
**
unused
):
self
.
data_root
=
data_root
assert
name
in
DATASET_NAME_TO_VERSION
version
=
DATASET_NAME_TO_VERSION
[
name
]
self
.
nusc
=
NuScenes
(
version
=
version
,
dataroot
=
data_root
,
verbose
=
True
)
self
.
datum_names
=
datum_names
self
.
min_num_lidar_points
=
min_num_lidar_points
self
.
min_box_visibility
=
min_box_visibility
self
.
dataset_item_info
=
self
.
_build_dataset_item_info
(
name
)
# Index instance tokens to their IDs
self
.
_instance_token_to_id
=
self
.
_index_instance_tokens
()
# Construct the mapping from datum_token (image id) to index
print
(
"Generating the mapping from image id to idx..."
)
self
.
datumtoken2idx
=
{}
for
idx
,
(
datum_token
,
_
,
_
,
_
,
_
)
in
enumerate
(
self
.
dataset_item_info
):
self
.
datumtoken2idx
[
datum_token
]
=
idx
print
(
"Done."
)
def
_build_dataset_item_info
(
self
,
name
):
scenes_in_split
=
self
.
_get_split_scenes
(
name
)
dataset_items
=
[]
for
_
,
scene_token
in
tqdm
(
scenes_in_split
):
scene
=
self
.
nusc
.
get
(
'scene'
,
scene_token
)
sample_token
=
scene
[
'first_sample_token'
]
for
sample_idx
in
range
(
scene
[
'nbr_samples'
]):
if
name
.
endswith
(
'subsample-8'
)
and
sample_idx
%
8
>
0
:
# Sample-level subsampling.
continue
sample
=
self
.
nusc
.
get
(
'sample'
,
sample_token
)
for
datum_name
,
datum_token
in
sample
[
'data'
].
items
():
if
datum_name
not
in
self
.
datum_names
:
continue
dataset_items
.
append
((
datum_token
,
sample_token
,
scene
[
'name'
],
sample_idx
,
datum_name
))
sample_token
=
sample
[
'next'
]
return
dataset_items
def
_get_split_scenes
(
self
,
name
):
scenes_in_splits
=
create_splits_scenes
()
if
name
==
"nusc_trainval"
:
scenes
=
scenes_in_splits
[
"train"
]
+
scenes_in_splits
[
"val"
]
elif
name
==
"nusc_val-subsample-8"
:
scenes
=
scenes_in_splits
[
"val"
]
else
:
assert
name
.
startswith
(
'nusc_'
),
f
"Invalid dataset name:
{
name
}
"
split
=
name
[
5
:]
assert
split
in
scenes_in_splits
,
f
"Invalid dataset:
{
split
}
"
scenes
=
scenes_in_splits
[
split
]
# Mapping from scene name to token.
name_to_token
=
{
scene
[
'name'
]:
scene
[
'token'
]
for
scene
in
self
.
nusc
.
scene
}
return
[(
name
,
name_to_token
[
name
])
for
name
in
scenes
]
def
__len__
(
self
):
return
len
(
self
.
dataset_item_info
)
def
_build_id
(
self
,
scene_name
,
sample_idx
,
datum_name
):
sample_id
=
f
"
{
scene_name
}
_
{
sample_idx
:
03
d
}
"
image_id
=
f
"
{
sample_id
}
_
{
datum_name
}
"
return
image_id
,
sample_id
def
_index_instance_tokens
(
self
):
"""Index instance tokens for uniquely identifying instances across samples"""
instance_token_to_id
=
{}
for
record
in
self
.
nusc
.
sample_annotation
:
instance_token
=
record
[
'instance_token'
]
if
instance_token
not
in
instance_token_to_id
:
next_instance_id
=
len
(
instance_token_to_id
)
instance_token_to_id
[
instance_token
]
=
next_instance_id
return
instance_token_to_id
def
get_instance_annotations
(
self
,
annotation_list
,
K
,
image_shape
,
pose_WS
):
annotations
=
[]
for
_ann
in
annotation_list
:
ann
=
self
.
nusc
.
get
(
'sample_annotation'
,
_ann
.
token
)
if
ann
[
'num_lidar_pts'
]
+
ann
[
'num_radar_pts'
]
<
self
.
min_num_lidar_points
:
continue
annotation
=
OrderedDict
()
# --------
# Category
# --------
category
=
category_to_detection_name
(
ann
[
'category_name'
])
if
category
is
None
:
continue
annotation
[
'category_id'
]
=
CATEGORY_IDS
[
category
]
# ------
# 3D box
# ------
# NOTE: ann['rotation'], ann['translation'] is in global frame.
pose_SO
=
Pose
(
wxyz
=
_ann
.
orientation
,
tvec
=
_ann
.
center
)
# pose in sensor frame
# DEBUG:
# pose_WO_1 = Pose(np.array(ann['rotation']), np.array(ann['translation']))
# pose_WO_2 = pose_WS * pose_SO
# assert np.allclose(pose_WO_1.matrix, pose_WO_2.matrix)
bbox3d
=
GenericBoxes3D
(
_ann
.
orientation
,
_ann
.
center
,
_ann
.
wlh
)
annotation
[
'bbox3d'
]
=
bbox3d
.
vectorize
().
tolist
()[
0
]
# --------------------------------------
# 2D box -- project 8 corners of 3D bbox
# --------------------------------------
corners
=
project_points3d
(
bbox3d
.
corners
.
cpu
().
numpy
().
squeeze
(
0
),
K
)
l
,
t
=
corners
[:,
0
].
min
(),
corners
[:,
1
].
min
()
r
,
b
=
corners
[:,
0
].
max
(),
corners
[:,
1
].
max
()
x1
=
max
(
0
,
l
)
y1
=
max
(
0
,
t
)
x2
=
min
(
image_shape
[
1
],
r
)
y2
=
min
(
image_shape
[
0
],
b
)
iou
=
_compute_iou
([
l
,
t
,
r
,
b
],
[
x1
,
y1
,
x2
,
y2
])
if
iou
<
self
.
min_box_visibility
:
continue
annotation
[
'bbox'
]
=
[
x1
,
y1
,
x2
,
y2
]
annotation
[
'bbox_mode'
]
=
BoxMode
.
XYXY_ABS
# --------
# Track ID
# --------
annotation
[
'track_id'
]
=
self
.
_instance_token_to_id
[
ann
[
'instance_token'
]]
# ---------
# Attribute
# ---------
attr_tokens
=
ann
[
'attribute_tokens'
]
assert
len
(
attr_tokens
)
<
2
# NOTE: Allow only single attrubute.
attribute_id
=
MAX_NUM_ATTRIBUTES
# By default, MAX_NUM_ATTRIBUTES -- this is to be ignored in loss compute.
if
attr_tokens
:
attribute
=
self
.
nusc
.
get
(
'attribute'
,
attr_tokens
[
0
])[
'name'
]
attribute_id
=
ATTRIBUTE_IDS
[
attribute
]
annotation
[
'attribute_id'
]
=
attribute_id
# -----
# Speed
# -----
vel_global
=
self
.
nusc
.
box_velocity
(
ann
[
'token'
])
speed
=
np
.
linalg
.
norm
(
vel_global
)
# NOTE: This can be NaN.
# DEBUG:
# speed * Quaternion(ann['rotation']).rotation_matrix.T[0] ~= vel_global
annotation
[
'speed'
]
=
speed
annotations
.
append
(
annotation
)
return
annotations
def
_get_ego_velocity
(
self
,
current
,
max_time_diff
=
1.5
):
"""Velocity of ego-vehicle in m/s.
"""
has_prev
=
current
[
'prev'
]
!=
''
has_next
=
current
[
'next'
]
!=
''
# Cannot estimate velocity for a single annotation.
if
not
has_prev
and
not
has_next
:
return
np
.
array
([
np
.
nan
,
np
.
nan
,
np
.
nan
])
if
has_prev
:
first
=
self
.
nusc
.
get
(
'sample_data'
,
current
[
'prev'
])
else
:
first
=
current
if
has_next
:
last
=
self
.
nusc
.
get
(
'sample_data'
,
current
[
'next'
])
else
:
last
=
current
pos_first
=
self
.
nusc
.
get
(
'ego_pose'
,
first
[
'ego_pose_token'
])[
'translation'
]
pos_last
=
self
.
nusc
.
get
(
'ego_pose'
,
last
[
'ego_pose_token'
])[
'translation'
]
pos_diff
=
np
.
float32
(
pos_last
)
-
np
.
float32
(
pos_first
)
time_last
=
1e-6
*
last
[
'timestamp'
]
time_first
=
1e-6
*
first
[
'timestamp'
]
time_diff
=
time_last
-
time_first
if
has_next
and
has_prev
:
# If doing centered difference, allow for up to double the max_time_diff.
max_time_diff
*=
2
if
time_diff
>
max_time_diff
:
# If time_diff is too big, don't return an estimate.
return
np
.
array
([
np
.
nan
,
np
.
nan
,
np
.
nan
])
else
:
return
pos_diff
/
time_diff
def
__getitem__
(
self
,
idx
):
datum_token
,
sample_token
,
scene_name
,
sample_idx
,
datum_name
=
self
.
dataset_item_info
[
idx
]
datum
=
self
.
nusc
.
get
(
'sample_data'
,
datum_token
)
assert
datum
[
'is_key_frame'
]
filename
,
_annotations
,
K
=
self
.
nusc
.
get_sample_data
(
datum_token
)
image_id
,
sample_id
=
self
.
_build_id
(
scene_name
,
sample_idx
,
datum_name
)
height
,
width
=
datum
[
'height'
],
datum
[
'width'
]
d2_dict
=
OrderedDict
(
file_name
=
filename
,
height
=
height
,
width
=
width
,
image_id
=
image_id
,
sample_id
=
sample_id
,
sample_token
=
sample_token
)
# Intrinsics
d2_dict
[
'intrinsics'
]
=
list
(
K
.
flatten
())
# Get pose of the sensor (S) from vehicle (V) frame
_pose_VS
=
self
.
nusc
.
get
(
'calibrated_sensor'
,
datum
[
'calibrated_sensor_token'
])
pose_VS
=
Pose
(
wxyz
=
np
.
float64
(
_pose_VS
[
'rotation'
]),
tvec
=
np
.
float64
(
_pose_VS
[
'translation'
]))
# Get ego-pose of the vehicle (V) from global/world (W) frame
_pose_WV
=
self
.
nusc
.
get
(
'ego_pose'
,
datum
[
'ego_pose_token'
])
pose_WV
=
Pose
(
wxyz
=
np
.
float64
(
_pose_WV
[
'rotation'
]),
tvec
=
np
.
float64
(
_pose_WV
[
'translation'
]))
pose_WS
=
pose_WV
*
pose_VS
d2_dict
[
'pose'
]
=
{
'wxyz'
:
list
(
pose_WS
.
quat
.
elements
),
'tvec'
:
list
(
pose_WS
.
tvec
)}
d2_dict
[
'extrinsics'
]
=
{
'wxyz'
:
list
(
pose_VS
.
quat
.
elements
),
'tvec'
:
list
(
pose_VS
.
tvec
)}
d2_dict
[
'ego_speed'
]
=
np
.
linalg
.
norm
(
self
.
_get_ego_velocity
(
datum
))
d2_dict
[
'annotations'
]
=
self
.
get_instance_annotations
(
_annotations
,
K
,
(
height
,
width
),
pose_WS
)
return
d2_dict
def
getitem_by_datumtoken
(
self
,
datum_token
):
# idx = self.datumtoken2idx[datum_token]
# ret = self.__getitem__(idx)
datum
=
self
.
nusc
.
get
(
'sample_data'
,
datum_token
)
sample_token
=
datum
[
'sample_token'
]
filename
,
_annotations
,
K
=
self
.
nusc
.
get_sample_data
(
datum_token
)
height
,
width
=
datum
[
'height'
],
datum
[
'width'
]
d2_dict
=
OrderedDict
(
file_name
=
filename
,
height
=
height
,
width
=
width
,
image_id
=
0
,
sample_id
=
0
,
sample_token
=
sample_token
)
# Intrinsics
d2_dict
[
'intrinsics'
]
=
list
(
K
.
flatten
())
# Get pose of the sensor (S) from vehicle (V) frame
_pose_VS
=
self
.
nusc
.
get
(
'calibrated_sensor'
,
datum
[
'calibrated_sensor_token'
])
pose_VS
=
Pose
(
wxyz
=
np
.
float64
(
_pose_VS
[
'rotation'
]),
tvec
=
np
.
float64
(
_pose_VS
[
'translation'
]))
# Get ego-pose of the vehicle (V) from global/world (W) frame
_pose_WV
=
self
.
nusc
.
get
(
'ego_pose'
,
datum
[
'ego_pose_token'
])
pose_WV
=
Pose
(
wxyz
=
np
.
float64
(
_pose_WV
[
'rotation'
]),
tvec
=
np
.
float64
(
_pose_WV
[
'translation'
]))
pose_WS
=
pose_WV
*
pose_VS
d2_dict
[
'pose'
]
=
{
'wxyz'
:
list
(
pose_WS
.
quat
.
elements
),
'tvec'
:
list
(
pose_WS
.
tvec
)}
d2_dict
[
'extrinsics'
]
=
{
'wxyz'
:
list
(
pose_VS
.
quat
.
elements
),
'tvec'
:
list
(
pose_VS
.
tvec
)}
d2_dict
[
'ego_speed'
]
=
np
.
linalg
.
norm
(
self
.
_get_ego_velocity
(
datum
))
d2_dict
[
'annotations'
]
=
self
.
get_instance_annotations
(
_annotations
,
K
,
(
height
,
width
),
pose_WS
)
return
d2_dict
\ No newline at end of file
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/datasets/transform_utils.py
0 → 100644
View file @
007f2e68
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Copyright 2021 Toyota Research Institute. All rights reserved.
# Adapted from detectron2:
# https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py
import
numpy
as
np
import
torch
from
detectron2.data
import
transforms
as
T
from
detectron2.structures
import
Boxes
,
BoxMode
,
Instances
from
projects.mmdet3d_plugin.dd3d.structures.boxes3d
import
Boxes3D
__all__
=
[
"transform_instance_annotations"
,
"annotations_to_instances"
]
def
transform_instance_annotations
(
annotation
,
transforms
,
image_size
,
):
"""Adapted from:
https://github.com/facebookresearch/detectron2/blob/master/detectron2/data/detection_utils.py#L254
The changes from original:
- The presence of 2D bounding box (i.e. "bbox" field) is assumed by default in d2; here it's optional.
- Add optional 3D bounding box support.
- If the instance mask annotation is in RLE, then it's decoded into polygons, not bitmask, to save memory.
===============================================================================================================
Apply transforms to box, segmentation and keypoints annotations of a single instance.
It will use `transforms.apply_box` for the box, and
`transforms.apply_coords` for segmentation polygons & keypoints.
If you need anything more specially designed for each data structure,
you'll need to implement your own version of this function or the transforms.
Args:
annotation (dict): dict of instance annotations for a single instance.
It will be modified in-place.
transforms (TransformList or list[Transform]):
image_size (tuple): the height, width of the transformed image
keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
Returns:
dict:
the same input dict with fields "bbox", "segmentation", "keypoints"
transformed according to `transforms`.
The "bbox_mode" field will be set to XYXY_ABS.
"""
if
isinstance
(
transforms
,
(
tuple
,
list
)):
transforms
=
T
.
TransformList
(
transforms
)
# (dennis.park) Here 2D bounding box is optional.
if
"bbox"
in
annotation
:
assert
"bbox_mode"
in
annotation
,
"'bbox' is present, but 'bbox_mode' is not."
# bbox is 1d (per-instance bounding box)
bbox
=
BoxMode
.
convert
(
annotation
[
"bbox"
],
annotation
[
"bbox_mode"
],
BoxMode
.
XYXY_ABS
)
bbox
=
transforms
.
apply_box
(
np
.
array
([
bbox
]))[
0
]
# clip transformed bbox to image size
bbox
=
bbox
.
clip
(
min
=
0
)
bbox
=
np
.
minimum
(
bbox
,
list
(
image_size
+
image_size
)[::
-
1
])
annotation
[
"bbox"
]
=
bbox
annotation
[
"bbox_mode"
]
=
BoxMode
.
XYXY_ABS
# Vertical flipping is not implemented (`flip_transform.py`). TODO: implement if needed.
if
"bbox3d"
in
annotation
:
bbox3d
=
np
.
array
(
annotation
[
"bbox3d"
])
annotation
[
'bbox3d'
]
=
transforms
.
apply_box3d
(
bbox3d
)
return
annotation
def
_create_empty_instances
(
image_size
):
target
=
Instances
(
image_size
)
target
.
gt_boxes
=
Boxes
([])
target
.
gt_classes
=
torch
.
tensor
([],
dtype
=
torch
.
int64
)
target
.
gt_boxes3d
=
Boxes3D
.
from_vectors
([],
torch
.
eye
(
3
,
dtype
=
torch
.
float32
))
return
target
def
annotations_to_instances
(
annos
,
image_size
,
intrinsics
=
None
,
):
"""
Create an :class:`Instances` object used by the models,
from instance annotations in the dataset dict.
Args:
annos (list[dict]): a list of instance annotations in one image, each
element for one instance.
image_size (tuple): height, width
Returns:
Instances:
It will contain fields "gt_boxes", "gt_classes",
"gt_masks", "gt_keypoints", if they can be obtained from `annos`.
This is the format that builtin models expect.
"""
if
len
(
annos
)
==
0
:
return
_create_empty_instances
(
image_size
)
boxes
=
[
BoxMode
.
convert
(
obj
[
"bbox"
],
obj
[
"bbox_mode"
],
BoxMode
.
XYXY_ABS
)
for
obj
in
annos
]
target
=
Instances
(
image_size
)
target
.
gt_boxes
=
Boxes
(
boxes
)
classes
=
[
obj
[
"category_id"
]
for
obj
in
annos
]
classes
=
torch
.
tensor
(
classes
,
dtype
=
torch
.
int64
)
target
.
gt_classes
=
classes
if
len
(
annos
)
and
"bbox3d"
in
annos
[
0
]:
assert
intrinsics
is
not
None
target
.
gt_boxes3d
=
Boxes3D
.
from_vectors
([
anno
[
'bbox3d'
]
for
anno
in
annos
],
intrinsics
)
if
len
(
target
.
gt_boxes3d
)
!=
target
.
gt_boxes
.
tensor
.
shape
[
0
]:
raise
ValueError
(
f
"The sizes of `gt_boxes3d` and `gt_boxes` do not match: a=
{
len
(
target
.
gt_boxes3d
)
}
, b=
{
target
.
gt_boxes
.
tensor
.
shape
[
0
]
}
."
)
# NOTE: add nuscenes attributes here
# NOTE: instances will be filtered later
# NuScenes attributes
if
len
(
annos
)
and
"attribute_id"
in
annos
[
0
]:
attributes
=
[
obj
[
"attribute_id"
]
for
obj
in
annos
]
target
.
gt_attributes
=
torch
.
tensor
(
attributes
,
dtype
=
torch
.
int64
)
# Speed (magnitude of velocity)
if
len
(
annos
)
and
"speed"
in
annos
[
0
]:
speeds
=
[
obj
[
"speed"
]
for
obj
in
annos
]
target
.
gt_speeds
=
torch
.
tensor
(
speeds
,
dtype
=
torch
.
float32
)
assert
len
(
boxes
)
==
len
(
classes
)
==
len
(
attributes
)
==
len
(
speeds
),
\
'the numbers of annotations should be the same'
return
target
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/iou_loss.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
# Adapted from AdelaiDet:
# https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/iou_loss.py
import
torch
from
torch
import
nn
class
IOULoss
(
nn
.
Module
):
"""
Intersetion Over Union (IoU) loss which supports three
different IoU computations:
* IoU
* Linear IoU
* gIoU
"""
def
__init__
(
self
,
loc_loss_type
=
'iou'
):
super
(
IOULoss
,
self
).
__init__
()
self
.
loc_loss_type
=
loc_loss_type
def
forward
(
self
,
pred
,
target
,
weight
=
None
):
"""
Args:
pred: Nx4 predicted bounding boxes
target: Nx4 target bounding boxes
weight: N loss weight for each instance
"""
pred_left
=
pred
[:,
0
]
pred_top
=
pred
[:,
1
]
pred_right
=
pred
[:,
2
]
pred_bottom
=
pred
[:,
3
]
target_left
=
target
[:,
0
]
target_top
=
target
[:,
1
]
target_right
=
target
[:,
2
]
target_bottom
=
target
[:,
3
]
target_aera
=
(
target_left
+
target_right
)
*
\
(
target_top
+
target_bottom
)
pred_aera
=
(
pred_left
+
pred_right
)
*
\
(
pred_top
+
pred_bottom
)
w_intersect
=
torch
.
min
(
pred_left
,
target_left
)
+
\
torch
.
min
(
pred_right
,
target_right
)
h_intersect
=
torch
.
min
(
pred_bottom
,
target_bottom
)
+
\
torch
.
min
(
pred_top
,
target_top
)
g_w_intersect
=
torch
.
max
(
pred_left
,
target_left
)
+
\
torch
.
max
(
pred_right
,
target_right
)
g_h_intersect
=
torch
.
max
(
pred_bottom
,
target_bottom
)
+
\
torch
.
max
(
pred_top
,
target_top
)
ac_uion
=
g_w_intersect
*
g_h_intersect
area_intersect
=
w_intersect
*
h_intersect
area_union
=
target_aera
+
pred_aera
-
area_intersect
ious
=
(
area_intersect
+
1.0
)
/
(
area_union
+
1.0
)
gious
=
ious
-
(
ac_uion
-
area_union
)
/
ac_uion
if
self
.
loc_loss_type
==
'iou'
:
losses
=
-
torch
.
log
(
ious
)
elif
self
.
loc_loss_type
==
'linear_iou'
:
losses
=
1
-
ious
elif
self
.
loc_loss_type
==
'giou'
:
losses
=
1
-
gious
else
:
raise
NotImplementedError
if
weight
is
not
None
:
return
(
losses
*
weight
).
sum
()
else
:
return
losses
.
sum
()
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/normalization.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
# Adapted from AdelaiDet
# https://github.com/aim-uofa/AdelaiDet/
import
logging
import
torch
from
torch
import
nn
LOG
=
logging
.
getLogger
(
__name__
)
class
Scale
(
nn
.
Module
):
def
__init__
(
self
,
init_value
=
1.0
):
super
(
Scale
,
self
).
__init__
()
self
.
scale
=
nn
.
Parameter
(
torch
.
FloatTensor
([
init_value
]))
def
forward
(
self
,
input
):
return
input
*
self
.
scale
class
Offset
(
nn
.
Module
):
def
__init__
(
self
,
init_value
=
0.
):
super
(
Offset
,
self
).
__init__
()
self
.
bias
=
nn
.
Parameter
(
torch
.
FloatTensor
([
init_value
]))
def
forward
(
self
,
input
):
return
input
+
self
.
bias
class
ModuleListDial
(
nn
.
ModuleList
):
def
__init__
(
self
,
modules
=
None
):
super
(
ModuleListDial
,
self
).
__init__
(
modules
)
self
.
cur_position
=
0
def
forward
(
self
,
x
):
result
=
self
[
self
.
cur_position
](
x
)
self
.
cur_position
+=
1
if
self
.
cur_position
>=
len
(
self
):
self
.
cur_position
=
0
return
result
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/layers/smooth_l1_loss.py
0 → 100644
View file @
007f2e68
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Copyright 2021 Toyota Research Institute. All rights reserved.
# Adapted from fvcore:
# https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
import
torch
def
smooth_l1_loss
(
input
:
torch
.
Tensor
,
target
:
torch
.
Tensor
,
beta
:
float
,
reduction
:
str
=
"none"
)
->
torch
.
Tensor
:
"""
Smooth L1 loss defined in the Fast R-CNN paper as:
| 0.5 * x ** 2 / beta if abs(x) < beta
smoothl1(x) = |
| abs(x) - 0.5 * beta otherwise,
where x = input - target.
Smooth L1 loss is related to Huber loss, which is defined as:
| 0.5 * x ** 2 if abs(x) < beta
huber(x) = |
| beta * (abs(x) - 0.5 * beta) otherwise
Smooth L1 loss is equal to huber(x) / beta. This leads to the following
differences:
- As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
converges to a constant 0 loss.
- As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
converges to L2 loss.
- For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
slope of 1. For Huber loss, the slope of the L1 segment is beta.
Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
portion replaced with a quadratic function such that at abs(x) = beta, its
slope is 1. The quadratic segment smooths the L1 loss near x = 0.
Args:
input (Tensor): input tensor of any shape
target (Tensor): target value tensor with the same shape as input
beta (float): L1 to L2 change point.
For beta values < 1e-5, L1 loss is computed.
reduction: 'none' | 'mean' | 'sum'
'none': No reduction will be applied to the output.
'mean': The output will be averaged.
'sum': The output will be summed.
Returns:
The loss with the reduction option applied.
Note:
PyTorch's builtin "Smooth L1 loss" implementation does not actually
implement Smooth L1 loss, nor does it implement Huber loss. It implements
the special case of both in which they are equal (beta=1).
See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
"""
# (dennis.park) Make it work with mixed precision training.
beta
=
torch
.
as_tensor
(
beta
).
to
(
input
.
dtype
)
if
beta
<
1e-5
:
# if beta == 0, then torch.where will result in nan gradients when
# the chain rule is applied due to pytorch implementation details
# (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
# zeros, rather than "no gradient"). To avoid this issue, we define
# small values of beta to be exactly l1 loss.
loss
=
torch
.
abs
(
input
-
target
)
else
:
n
=
torch
.
abs
(
input
-
target
)
cond
=
n
<
beta
a
=
0.5
*
n
**
2
b
=
n
-
0.5
*
beta
a
,
b
=
a
.
to
(
input
.
dtype
),
b
.
to
(
input
.
dtype
)
loss
=
torch
.
where
(
cond
,
a
,
b
)
# loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
if
reduction
==
"mean"
:
loss
=
loss
.
mean
()
elif
reduction
==
"sum"
:
loss
=
loss
.
sum
()
return
loss
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/__init__.py
0 → 100644
View file @
007f2e68
from
.nuscenes_dd3d
import
NuscenesDD3D
\ No newline at end of file
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/core.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
import
torch
from
torch
import
nn
#from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
from
detectron2.modeling.postprocessing
import
detector_postprocess
as
resize_instances
from
detectron2.structures
import
Instances
from
detectron2.layers
import
ShapeSpec
from
mmcv.runner
import
force_fp32
from
.fcos2d
import
FCOS2DHead
,
FCOS2DInference
,
FCOS2DLoss
from
.fcos3d
import
FCOS3DHead
,
FCOS3DInference
,
FCOS3DLoss
#from tridet.modeling.dd3d.postprocessing import nuscenes_sample_aggregate
from
.prepare_targets
import
DD3DTargetPreparer
#from tridet.modeling.feature_extractor import build_feature_extractor
from
projects.mmdet3d_plugin.dd3d.structures.image_list
import
ImageList
from
projects.mmdet3d_plugin.dd3d.utils.tensor2d
import
compute_features_locations
as
compute_locations_per_level
#@META_ARCH_REGISTRY.register()
class
DD3D
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
,
in_channels
,
strides
,
fcos2d_cfg
=
dict
(),
fcos2d_loss_cfg
=
dict
(),
fcos3d_cfg
=
dict
(),
fcos3d_loss_cfg
=
dict
(),
target_assign_cfg
=
dict
(),
box3d_on
=
True
,
feature_locations_offset
=
"none"
):
super
().
__init__
()
# NOTE: do not need backbone
# self.backbone = build_feature_extractor(cfg)
# backbone_output_shape = self.backbone.output_shape()
# self.in_features = cfg.DD3D.IN_FEATURES or list(backbone_output_shape.keys())
self
.
backbone_output_shape
=
[
ShapeSpec
(
channels
=
in_channels
,
stride
=
s
)
for
s
in
strides
]
self
.
feature_locations_offset
=
feature_locations_offset
self
.
fcos2d_head
=
FCOS2DHead
(
num_classes
=
num_classes
,
input_shape
=
self
.
backbone_output_shape
,
**
fcos2d_cfg
)
self
.
fcos2d_loss
=
FCOS2DLoss
(
num_classes
=
num_classes
,
**
fcos2d_loss_cfg
)
# NOTE: inference later
# self.fcos2d_inference = FCOS2DInference(cfg)
if
box3d_on
:
self
.
fcos3d_head
=
FCOS3DHead
(
num_classes
=
num_classes
,
input_shape
=
self
.
backbone_output_shape
,
**
fcos3d_cfg
)
self
.
fcos3d_loss
=
FCOS3DLoss
(
num_classes
=
num_classes
,
**
fcos3d_loss_cfg
)
# NOTE: inference later
# self.fcos3d_inference = FCOS3DInference(cfg)
self
.
only_box2d
=
False
else
:
self
.
only_box2d
=
True
self
.
prepare_targets
=
DD3DTargetPreparer
(
num_classes
=
num_classes
,
input_shape
=
self
.
backbone_output_shape
,
box3d_on
=
box3d_on
,
**
target_assign_cfg
)
# NOTE: inference later
# self.postprocess_in_inference = cfg.DD3D.INFERENCE.DO_POSTPROCESS
# self.do_nms = cfg.DD3D.INFERENCE.DO_NMS
# self.do_bev_nms = cfg.DD3D.INFERENCE.DO_BEV_NMS
# self.bev_nms_iou_thresh = cfg.DD3D.INFERENCE.BEV_NMS_IOU_THRESH
# nuScenes inference aggregates detections over all 6 cameras.
# self.nusc_sample_aggregate_in_inference = cfg.DD3D.INFERENCE.NUSC_SAMPLE_AGGREGATE
self
.
num_classes
=
num_classes
# NOTE: do not need normalize
# self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
# self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
# NOTE:
# @property
# def device(self):
# return self.pixel_mean.device
# def preprocess_image(self, x):
# return (x - self.pixel_mean) / self.pixel_std
@
force_fp32
(
apply_to
=
(
'features'
))
def
forward
(
self
,
features
,
batched_inputs
):
# NOTE:
# images = [x["image"].to(self.device) for x in batched_inputs]
# images = [self.preprocess_image(x) for x in images]
# NOTE: directly use inv_intrinsics
# if 'intrinsics' in batched_inputs[0]:
# intrinsics = [x['intrinsics'].to(self.device) for x in batched_inputs]
# else:
# intrinsics = None
# images = ImageList.from_tensors(images, self.backbone.size_divisibility, intrinsics=intrinsics)
if
'inv_intrinsics'
in
batched_inputs
[
0
]:
inv_intrinsics
=
[
x
[
'inv_intrinsics'
].
to
(
features
[
0
].
device
)
for
x
in
batched_inputs
]
inv_intrinsics
=
torch
.
stack
(
inv_intrinsics
,
dim
=
0
)
else
:
inv_intrinsics
=
None
# NOTE:
# gt_dense_depth = None
# if 'depth' in batched_inputs[0]:
# gt_dense_depth = [x["depth"].to(self.device) for x in batched_inputs]
# gt_dense_depth = ImageList.from_tensors(
# gt_dense_depth, self.backbone.size_divisibility, intrinsics=intrinsics
# )
# NOTE: directly input feature
# features = self.backbone(images.tensor)
# features = [features[f] for f in self.in_features]
if
"instances"
in
batched_inputs
[
0
]:
gt_instances
=
[
x
[
"instances"
].
to
(
features
[
0
].
device
)
for
x
in
batched_inputs
]
else
:
gt_instances
=
None
locations
=
self
.
compute_locations
(
features
)
logits
,
box2d_reg
,
centerness
,
_
=
self
.
fcos2d_head
(
features
)
if
not
self
.
only_box2d
:
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
dense_depth
=
self
.
fcos3d_head
(
features
)
# NOTE: directly use inv_intrinsics
# inv_intrinsics = images.intrinsics.inverse() if images.intrinsics is not None else None
if
self
.
training
:
assert
gt_instances
is
not
None
feature_shapes
=
[
x
.
shape
[
-
2
:]
for
x
in
features
]
training_targets
=
self
.
prepare_targets
(
locations
,
gt_instances
,
feature_shapes
)
# NOTE:
# if gt_dense_depth is not None:
# training_targets.update({"dense_depth": gt_dense_depth})
losses
=
{}
fcos2d_loss
,
fcos2d_info
=
self
.
fcos2d_loss
(
logits
,
box2d_reg
,
centerness
,
training_targets
)
losses
.
update
(
fcos2d_loss
)
if
not
self
.
only_box2d
:
fcos3d_loss
=
self
.
fcos3d_loss
(
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
dense_depth
,
inv_intrinsics
,
fcos2d_info
,
training_targets
)
losses
.
update
(
fcos3d_loss
)
return
losses
else
:
# TODO: do not support inference now
raise
NotImplementedError
pred_instances
,
fcos2d_info
=
self
.
fcos2d_inference
(
logits
,
box2d_reg
,
centerness
,
locations
,
images
.
image_sizes
)
if
not
self
.
only_box2d
:
# This adds 'pred_boxes3d' and 'scores_3d' to Instances in 'pred_instances' in place.
self
.
fcos3d_inference
(
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
inv_intrinsics
,
pred_instances
,
fcos2d_info
)
# 3D score == 2D score x confidence.
score_key
=
"scores_3d"
else
:
score_key
=
"scores"
# Transpose to "image-first", i.e. (B, L)
pred_instances
=
list
(
zip
(
*
pred_instances
))
pred_instances
=
[
Instances
.
cat
(
instances
)
for
instances
in
pred_instances
]
# 2D NMS and pick top-K.
if
self
.
do_nms
:
pred_instances
=
self
.
fcos2d_inference
.
nms_and_top_k
(
pred_instances
,
score_key
)
if
not
self
.
only_box2d
and
self
.
do_bev_nms
:
# Bird-eye-view NMS.
dummy_group_idxs
=
{
i
:
[
i
]
for
i
,
_
in
enumerate
(
pred_instances
)}
if
'pose'
in
batched_inputs
[
0
]:
poses
=
[
x
[
'pose'
]
for
x
in
batched_inputs
]
else
:
poses
=
[
x
[
'extrinsics'
]
for
x
in
batched_inputs
]
pred_instances
=
nuscenes_sample_aggregate
(
pred_instances
,
dummy_group_idxs
,
self
.
num_classes
,
poses
,
iou_threshold
=
self
.
bev_nms_iou_thresh
,
include_boxes3d_global
=
False
)
if
self
.
postprocess_in_inference
:
processed_results
=
[]
for
results_per_image
,
input_per_image
,
image_size
in
\
zip
(
pred_instances
,
batched_inputs
,
images
.
image_sizes
):
height
=
input_per_image
.
get
(
"height"
,
image_size
[
0
])
width
=
input_per_image
.
get
(
"width"
,
image_size
[
1
])
r
=
resize_instances
(
results_per_image
,
height
,
width
)
processed_results
.
append
({
"instances"
:
r
})
else
:
processed_results
=
[{
"instances"
:
x
}
for
x
in
pred_instances
]
return
processed_results
def
compute_locations
(
self
,
features
):
locations
=
[]
in_strides
=
[
x
.
stride
for
x
in
self
.
backbone_output_shape
]
for
level
,
feature
in
enumerate
(
features
):
h
,
w
=
feature
.
size
()[
-
2
:]
locations_per_level
=
compute_locations_per_level
(
h
,
w
,
in_strides
[
level
],
feature
.
dtype
,
feature
.
device
,
offset
=
self
.
feature_locations_offset
)
locations
.
append
(
locations_per_level
)
return
locations
def
forward_train
(
self
,
features
,
batched_inputs
):
self
.
train
()
return
self
.
forward
(
features
,
batched_inputs
)
\ No newline at end of file
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/disentangled_box3d_loss.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
import
logging
import
torch
import
torch.nn
as
nn
from
projects.mmdet3d_plugin.dd3d.layers.smooth_l1_loss
import
smooth_l1_loss
LOG
=
logging
.
getLogger
(
__name__
)
class
DisentangledBox3DLoss
(
nn
.
Module
):
def
__init__
(
self
,
smooth_l1_loss_beta
,
max_loss_per_group
):
super
().
__init__
()
self
.
smooth_l1_loss_beta
=
smooth_l1_loss_beta
self
.
max_loss_per_group
=
max_loss_per_group
def
forward
(
self
,
box3d_pred
,
box3d_targets
,
locations
,
weights
=
None
):
box3d_pred
=
box3d_pred
.
to
(
torch
.
float32
)
box3d_targets
=
box3d_targets
.
to
(
torch
.
float32
)
target_corners
=
box3d_targets
.
corners
disentangled_losses
=
{}
for
component_key
in
[
"quat"
,
"proj_ctr"
,
"depth"
,
"size"
]:
disentangled_boxes
=
box3d_targets
.
clone
()
setattr
(
disentangled_boxes
,
component_key
,
getattr
(
box3d_pred
,
component_key
))
pred_corners
=
disentangled_boxes
.
to
(
torch
.
float32
).
corners
loss
=
smooth_l1_loss
(
pred_corners
,
target_corners
,
beta
=
self
.
smooth_l1_loss_beta
)
# Bound the loss
loss
.
clamp
(
max
=
self
.
max_loss_per_group
)
if
weights
is
not
None
:
# loss = torch.sum(loss.reshape(-1, 24) * weights.unsqueeze(-1))
loss
=
torch
.
sum
(
loss
.
reshape
(
-
1
,
24
).
mean
(
dim
=
1
)
*
weights
)
else
:
loss
=
loss
.
reshape
(
-
1
,
24
).
mean
()
disentangled_losses
[
"loss_box3d_"
+
component_key
]
=
loss
entangled_l1_dist
=
(
target_corners
-
box3d_pred
.
corners
).
detach
().
abs
().
reshape
(
-
1
,
24
).
mean
(
dim
=
1
)
return
disentangled_losses
,
entangled_l1_dist
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos2d.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
# Adapted from AdelaiDet:
# https://github.com/aim-uofa/AdelaiDet
import
torch
from
fvcore.nn
import
sigmoid_focal_loss
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
detectron2.layers
import
Conv2d
,
batched_nms
,
cat
,
get_norm
from
detectron2.structures
import
Boxes
,
Instances
from
detectron2.utils.comm
import
get_world_size
from
mmcv.runner
import
force_fp32
from
projects.mmdet3d_plugin.dd3d.layers.iou_loss
import
IOULoss
from
projects.mmdet3d_plugin.dd3d.layers.normalization
import
ModuleListDial
,
Scale
from
projects.mmdet3d_plugin.dd3d.utils.comm
import
reduce_sum
INF
=
100000000
def
compute_ctrness_targets
(
reg_targets
):
if
len
(
reg_targets
)
==
0
:
return
reg_targets
.
new_zeros
(
len
(
reg_targets
))
left_right
=
reg_targets
[:,
[
0
,
2
]]
top_bottom
=
reg_targets
[:,
[
1
,
3
]]
ctrness
=
(
left_right
.
min
(
dim
=-
1
)[
0
]
/
left_right
.
max
(
dim
=-
1
)[
0
])
*
\
(
top_bottom
.
min
(
dim
=-
1
)[
0
]
/
top_bottom
.
max
(
dim
=-
1
)[
0
])
return
torch
.
sqrt
(
ctrness
)
class
FCOS2DHead
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
,
input_shape
,
num_cls_convs
=
4
,
num_box_convs
=
4
,
norm
=
'BN'
,
use_deformable
=
False
,
use_scale
=
True
,
box2d_scale_init_factor
=
1.0
,
version
=
'v2'
):
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
in_strides
=
[
shape
.
stride
for
shape
in
input_shape
]
self
.
num_levels
=
len
(
input_shape
)
self
.
use_scale
=
use_scale
self
.
box2d_scale_init_factor
=
box2d_scale_init_factor
self
.
_version
=
version
in_channels
=
[
s
.
channels
for
s
in
input_shape
]
assert
len
(
set
(
in_channels
))
==
1
,
"Each level must have the same channel!"
in_channels
=
in_channels
[
0
]
if
use_deformable
:
raise
ValueError
(
"Not supported yet."
)
head_configs
=
{
'cls'
:
num_cls_convs
,
'box2d'
:
num_box_convs
}
for
head_name
,
num_convs
in
head_configs
.
items
():
tower
=
[]
if
self
.
_version
==
"v1"
:
for
_
in
range
(
num_convs
):
conv_func
=
nn
.
Conv2d
tower
.
append
(
conv_func
(
in_channels
,
in_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
))
if
norm
==
"GN"
:
raise
NotImplementedError
()
elif
norm
==
"NaiveGN"
:
raise
NotImplementedError
()
elif
norm
==
"BN"
:
tower
.
append
(
ModuleListDial
([
nn
.
BatchNorm2d
(
in_channels
)
for
_
in
range
(
self
.
num_levels
)]))
elif
norm
==
"SyncBN"
:
raise
NotImplementedError
()
tower
.
append
(
nn
.
ReLU
())
elif
self
.
_version
==
"v2"
:
for
_
in
range
(
num_convs
):
if
norm
in
(
"BN"
,
"FrozenBN"
,
"SyncBN"
,
"GN"
):
# NOTE: need to add norm here!
# Each FPN level has its own batchnorm layer.
# NOTE: do not use dd3d train.py!
# "BN" is converted to "SyncBN" in distributed training (see train.py)
norm_layer
=
ModuleListDial
([
get_norm
(
norm
,
in_channels
)
for
_
in
range
(
self
.
num_levels
)])
else
:
norm_layer
=
get_norm
(
norm
,
in_channels
)
tower
.
append
(
Conv2d
(
in_channels
,
in_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
norm_layer
is
None
,
norm
=
norm_layer
,
activation
=
F
.
relu
)
)
else
:
raise
ValueError
(
f
"Invalid FCOS2D version:
{
self
.
_version
}
"
)
self
.
add_module
(
f
'
{
head_name
}
_tower'
,
nn
.
Sequential
(
*
tower
))
self
.
cls_logits
=
nn
.
Conv2d
(
in_channels
,
self
.
num_classes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
box2d_reg
=
nn
.
Conv2d
(
in_channels
,
4
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
centerness
=
nn
.
Conv2d
(
in_channels
,
1
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
if
self
.
use_scale
:
if
self
.
_version
==
"v1"
:
self
.
scales_reg
=
nn
.
ModuleList
([
Scale
(
init_value
=
stride
*
self
.
box2d_scale_init_factor
)
for
stride
in
self
.
in_strides
])
else
:
self
.
scales_box2d_reg
=
nn
.
ModuleList
([
Scale
(
init_value
=
stride
*
self
.
box2d_scale_init_factor
)
for
stride
in
self
.
in_strides
])
self
.
init_weights
()
def
init_weights
(
self
):
for
tower
in
[
self
.
cls_tower
,
self
.
box2d_tower
]:
for
l
in
tower
.
modules
():
if
isinstance
(
l
,
nn
.
Conv2d
):
torch
.
nn
.
init
.
kaiming_normal_
(
l
.
weight
,
mode
=
'fan_out'
,
nonlinearity
=
'relu'
)
if
l
.
bias
is
not
None
:
torch
.
nn
.
init
.
constant_
(
l
.
bias
,
0
)
predictors
=
[
self
.
cls_logits
,
self
.
box2d_reg
,
self
.
centerness
]
for
modules
in
predictors
:
for
l
in
modules
.
modules
():
if
isinstance
(
l
,
nn
.
Conv2d
):
torch
.
nn
.
init
.
kaiming_uniform_
(
l
.
weight
,
a
=
1
)
if
l
.
bias
is
not
None
:
# depth head may not have bias.
torch
.
nn
.
init
.
constant_
(
l
.
bias
,
0
)
def
forward
(
self
,
x
):
logits
=
[]
box2d_reg
=
[]
centerness
=
[]
extra_output
=
{
"cls_tower_out"
:
[]}
for
l
,
feature
in
enumerate
(
x
):
cls_tower_out
=
self
.
cls_tower
(
feature
)
bbox_tower_out
=
self
.
box2d_tower
(
feature
)
# 2D box
logits
.
append
(
self
.
cls_logits
(
cls_tower_out
))
centerness
.
append
(
self
.
centerness
(
bbox_tower_out
))
box_reg
=
self
.
box2d_reg
(
bbox_tower_out
)
if
self
.
use_scale
:
# TODO: to optimize the runtime, apply this scaling in inference (and loss compute) only on FG pixels?
if
self
.
_version
==
"v1"
:
box_reg
=
self
.
scales_reg
[
l
](
box_reg
)
else
:
box_reg
=
self
.
scales_box2d_reg
[
l
](
box_reg
)
# Note that we use relu, as in the improved FCOS, instead of exp.
box2d_reg
.
append
(
F
.
relu
(
box_reg
))
extra_output
[
'cls_tower_out'
].
append
(
cls_tower_out
)
return
logits
,
box2d_reg
,
centerness
,
extra_output
class
FCOS2DLoss
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
,
focal_loss_alpha
=
0.25
,
focal_loss_gamma
=
2.0
,
loc_loss_type
=
'giou'
,
):
super
().
__init__
()
self
.
focal_loss_alpha
=
focal_loss_alpha
self
.
focal_loss_gamma
=
focal_loss_gamma
self
.
box2d_reg_loss_fn
=
IOULoss
(
loc_loss_type
)
self
.
num_classes
=
num_classes
@
force_fp32
(
apply_to
=
(
'logits'
,
'box2d_reg'
,
'centerness'
))
def
forward
(
self
,
logits
,
box2d_reg
,
centerness
,
targets
):
labels
=
targets
[
'labels'
]
box2d_reg_targets
=
targets
[
'box2d_reg_targets'
]
pos_inds
=
targets
[
"pos_inds"
]
if
len
(
labels
)
!=
box2d_reg_targets
.
shape
[
0
]:
raise
ValueError
(
f
"The size of 'labels' and 'box2d_reg_targets' does not match: a=
{
len
(
labels
)
}
, b=
{
box2d_reg_targets
.
shape
[
0
]
}
"
)
# Flatten predictions
logits
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
num_classes
)
for
x
in
logits
])
box2d_reg_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
4
)
for
x
in
box2d_reg
])
centerness_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
)
for
x
in
centerness
])
# -------------------
# Classification loss
# -------------------
num_pos_local
=
pos_inds
.
numel
()
num_gpus
=
get_world_size
()
total_num_pos
=
reduce_sum
(
pos_inds
.
new_tensor
([
num_pos_local
])).
item
()
num_pos_avg
=
max
(
total_num_pos
/
num_gpus
,
1.0
)
# prepare one_hot
cls_target
=
torch
.
zeros_like
(
logits
)
cls_target
[
pos_inds
,
labels
[
pos_inds
]]
=
1
loss_cls
=
sigmoid_focal_loss
(
logits
,
cls_target
,
alpha
=
self
.
focal_loss_alpha
,
gamma
=
self
.
focal_loss_gamma
,
reduction
=
"sum"
,
)
/
num_pos_avg
# NOTE: The rest of losses only consider foreground pixels.
box2d_reg_pred
=
box2d_reg_pred
[
pos_inds
]
box2d_reg_targets
=
box2d_reg_targets
[
pos_inds
]
centerness_pred
=
centerness_pred
[
pos_inds
]
# Compute centerness targets here using 2D regression targets of foreground pixels.
centerness_targets
=
compute_ctrness_targets
(
box2d_reg_targets
)
# Denominator for all foreground losses.
ctrness_targets_sum
=
centerness_targets
.
sum
()
loss_denom
=
max
(
reduce_sum
(
ctrness_targets_sum
).
item
()
/
num_gpus
,
1e-6
)
# NOTE: change the return after reduce_sum
if
pos_inds
.
numel
()
==
0
:
losses
=
{
"loss_cls"
:
loss_cls
,
"loss_box2d_reg"
:
box2d_reg_pred
.
sum
()
*
0.
,
"loss_centerness"
:
centerness_pred
.
sum
()
*
0.
,
}
return
losses
,
{}
# ----------------------
# 2D box regression loss
# ----------------------
loss_box2d_reg
=
self
.
box2d_reg_loss_fn
(
box2d_reg_pred
,
box2d_reg_targets
,
centerness_targets
)
/
loss_denom
# ---------------
# Centerness loss
# ---------------
loss_centerness
=
F
.
binary_cross_entropy_with_logits
(
centerness_pred
,
centerness_targets
,
reduction
=
"sum"
)
/
num_pos_avg
loss_dict
=
{
"loss_cls"
:
loss_cls
,
"loss_box2d_reg"
:
loss_box2d_reg
,
"loss_centerness"
:
loss_centerness
}
extra_info
=
{
"loss_denom"
:
loss_denom
,
"centerness_targets"
:
centerness_targets
}
return
loss_dict
,
extra_info
class
FCOS2DInference
():
def
__init__
(
self
,
cfg
):
self
.
thresh_with_ctr
=
cfg
.
DD3D
.
FCOS2D
.
INFERENCE
.
THRESH_WITH_CTR
self
.
pre_nms_thresh
=
cfg
.
DD3D
.
FCOS2D
.
INFERENCE
.
PRE_NMS_THRESH
self
.
pre_nms_topk
=
cfg
.
DD3D
.
FCOS2D
.
INFERENCE
.
PRE_NMS_TOPK
self
.
post_nms_topk
=
cfg
.
DD3D
.
FCOS2D
.
INFERENCE
.
POST_NMS_TOPK
self
.
nms_thresh
=
cfg
.
DD3D
.
FCOS2D
.
INFERENCE
.
NMS_THRESH
self
.
num_classes
=
cfg
.
DD3D
.
NUM_CLASSES
def
__call__
(
self
,
logits
,
box2d_reg
,
centerness
,
locations
,
image_sizes
):
pred_instances
=
[]
# List[List[Instances]], shape = (L, B)
extra_info
=
[]
for
lvl
,
(
logits_lvl
,
box2d_reg_lvl
,
centerness_lvl
,
locations_lvl
)
in
\
enumerate
(
zip
(
logits
,
box2d_reg
,
centerness
,
locations
)):
instances_per_lvl
,
extra_info_per_lvl
=
self
.
forward_for_single_feature_map
(
logits_lvl
,
box2d_reg_lvl
,
centerness_lvl
,
locations_lvl
,
image_sizes
)
# List of Instances; one for each image.
for
instances_per_im
in
instances_per_lvl
:
instances_per_im
.
fpn_levels
=
locations_lvl
.
new_ones
(
len
(
instances_per_im
),
dtype
=
torch
.
long
)
*
lvl
pred_instances
.
append
(
instances_per_lvl
)
extra_info
.
append
(
extra_info_per_lvl
)
return
pred_instances
,
extra_info
def
forward_for_single_feature_map
(
self
,
logits
,
box2d_reg
,
centerness
,
locations
,
image_sizes
):
N
,
C
,
_
,
__
=
logits
.
shape
# put in the same format as locations
scores
=
logits
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
C
).
sigmoid
()
box2d_reg
=
box2d_reg
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
4
)
centerness
=
centerness
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
).
sigmoid
()
# if self.thresh_with_ctr is True, we multiply the classification
# scores with centerness scores before applying the threshold.
if
self
.
thresh_with_ctr
:
scores
=
scores
*
centerness
[:,
:,
None
]
candidate_mask
=
scores
>
self
.
pre_nms_thresh
pre_nms_topk
=
candidate_mask
.
reshape
(
N
,
-
1
).
sum
(
1
)
pre_nms_topk
=
pre_nms_topk
.
clamp
(
max
=
self
.
pre_nms_topk
)
if
not
self
.
thresh_with_ctr
:
scores
=
scores
*
centerness
[:,
:,
None
]
results
=
[]
all_fg_inds_per_im
,
all_topk_indices
,
all_class_inds_per_im
=
[],
[],
[]
for
i
in
range
(
N
):
scores_per_im
=
scores
[
i
]
candidate_mask_per_im
=
candidate_mask
[
i
]
scores_per_im
=
scores_per_im
[
candidate_mask_per_im
]
candidate_inds_per_im
=
candidate_mask_per_im
.
nonzero
(
as_tuple
=
False
)
fg_inds_per_im
=
candidate_inds_per_im
[:,
0
]
class_inds_per_im
=
candidate_inds_per_im
[:,
1
]
# Cache info here.
all_fg_inds_per_im
.
append
(
fg_inds_per_im
)
all_class_inds_per_im
.
append
(
class_inds_per_im
)
box2d_reg_per_im
=
box2d_reg
[
i
][
fg_inds_per_im
]
locations_per_im
=
locations
[
fg_inds_per_im
]
pre_nms_topk_per_im
=
pre_nms_topk
[
i
]
if
candidate_mask_per_im
.
sum
().
item
()
>
pre_nms_topk_per_im
.
item
():
scores_per_im
,
topk_indices
=
\
scores_per_im
.
topk
(
pre_nms_topk_per_im
,
sorted
=
False
)
class_inds_per_im
=
class_inds_per_im
[
topk_indices
]
box2d_reg_per_im
=
box2d_reg_per_im
[
topk_indices
]
locations_per_im
=
locations_per_im
[
topk_indices
]
else
:
topk_indices
=
None
all_topk_indices
.
append
(
topk_indices
)
detections
=
torch
.
stack
([
locations_per_im
[:,
0
]
-
box2d_reg_per_im
[:,
0
],
locations_per_im
[:,
1
]
-
box2d_reg_per_im
[:,
1
],
locations_per_im
[:,
0
]
+
box2d_reg_per_im
[:,
2
],
locations_per_im
[:,
1
]
+
box2d_reg_per_im
[:,
3
],
],
dim
=
1
)
instances
=
Instances
(
image_sizes
[
i
])
instances
.
pred_boxes
=
Boxes
(
detections
)
instances
.
scores
=
torch
.
sqrt
(
scores_per_im
)
instances
.
pred_classes
=
class_inds_per_im
instances
.
locations
=
locations_per_im
results
.
append
(
instances
)
extra_info
=
{
"fg_inds_per_im"
:
all_fg_inds_per_im
,
"class_inds_per_im"
:
all_class_inds_per_im
,
"topk_indices"
:
all_topk_indices
}
return
results
,
extra_info
def
nms_and_top_k
(
self
,
instances_per_im
,
score_key_for_nms
=
"scores"
):
results
=
[]
for
instances
in
instances_per_im
:
if
self
.
nms_thresh
>
0
:
# Multiclass NMS.
keep
=
batched_nms
(
instances
.
pred_boxes
.
tensor
,
instances
.
get
(
score_key_for_nms
),
instances
.
pred_classes
,
self
.
nms_thresh
)
instances
=
instances
[
keep
]
num_detections
=
len
(
instances
)
# Limit to max_per_image detections **over all classes**
if
num_detections
>
self
.
post_nms_topk
>
0
:
scores
=
instances
.
scores
# image_thresh, _ = torch.kthvalue(scores.cpu(), num_detections - self.post_nms_topk + 1)
image_thresh
,
_
=
torch
.
kthvalue
(
scores
,
num_detections
-
self
.
post_nms_topk
+
1
)
keep
=
scores
>=
image_thresh
.
item
()
keep
=
torch
.
nonzero
(
keep
).
squeeze
(
1
)
instances
=
instances
[
keep
]
results
.
append
(
instances
)
return
results
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/fcos3d.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
detectron2.layers
import
Conv2d
,
cat
,
get_norm
from
mmcv.runner
import
force_fp32
from
projects.mmdet3d_plugin.dd3d.layers.normalization
import
ModuleListDial
,
Offset
,
Scale
from
.disentangled_box3d_loss
import
DisentangledBox3DLoss
from
projects.mmdet3d_plugin.dd3d.structures.boxes3d
import
Boxes3D
from
projects.mmdet3d_plugin.dd3d.utils.geometry
import
allocentric_to_egocentric
,
unproject_points2d
EPS
=
1e-7
def
predictions_to_boxes3d
(
quat
,
proj_ctr
,
depth
,
size
,
locations
,
inv_intrinsics
,
canon_box_sizes
,
min_depth
,
max_depth
,
scale_depth_by_focal_lengths_factor
,
scale_depth_by_focal_lengths
=
True
,
quat_is_allocentric
=
True
,
depth_is_distance
=
False
):
# Normalize to make quat unit norm.
quat
=
quat
/
quat
.
norm
(
dim
=
1
,
keepdim
=
True
).
clamp
(
min
=
EPS
)
# Make sure again it's numerically unit-norm.
quat
=
quat
/
quat
.
norm
(
dim
=
1
,
keepdim
=
True
)
if
scale_depth_by_focal_lengths
:
pixel_size
=
torch
.
norm
(
torch
.
stack
([
inv_intrinsics
[:,
0
,
0
],
inv_intrinsics
[:,
1
,
1
]],
dim
=-
1
),
dim
=-
1
)
depth
=
depth
/
(
pixel_size
*
scale_depth_by_focal_lengths_factor
)
if
depth_is_distance
:
depth
=
depth
/
unproject_points2d
(
locations
,
inv_intrinsics
).
norm
(
dim
=
1
).
clamp
(
min
=
EPS
)
depth
=
depth
.
reshape
(
-
1
,
1
).
clamp
(
min_depth
,
max_depth
)
proj_ctr
=
proj_ctr
+
locations
if
quat_is_allocentric
:
quat
=
allocentric_to_egocentric
(
quat
,
proj_ctr
,
inv_intrinsics
)
size
=
(
size
.
tanh
()
+
1.
)
*
canon_box_sizes
# max size = 2 * canon_size
return
Boxes3D
(
quat
,
proj_ctr
,
depth
,
size
,
inv_intrinsics
)
class
FCOS3DHead
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
,
input_shape
,
num_convs
=
4
,
norm
=
'BN'
,
use_scale
=
True
,
depth_scale_init_factor
=
0.3
,
proj_ctr_scale_init_factor
=
1.0
,
use_per_level_predictors
=
False
,
class_agnostic
=
False
,
use_deformable
=
False
,
mean_depth_per_level
=
None
,
std_depth_per_level
=
None
,
):
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
in_strides
=
[
shape
.
stride
for
shape
in
input_shape
]
self
.
num_levels
=
len
(
input_shape
)
self
.
use_scale
=
use_scale
self
.
depth_scale_init_factor
=
depth_scale_init_factor
self
.
proj_ctr_scale_init_factor
=
proj_ctr_scale_init_factor
self
.
use_per_level_predictors
=
use_per_level_predictors
self
.
register_buffer
(
"mean_depth_per_level"
,
torch
.
Tensor
(
mean_depth_per_level
))
self
.
register_buffer
(
"std_depth_per_level"
,
torch
.
Tensor
(
std_depth_per_level
))
in_channels
=
[
s
.
channels
for
s
in
input_shape
]
assert
len
(
set
(
in_channels
))
==
1
,
"Each level must have the same channel!"
in_channels
=
in_channels
[
0
]
if
use_deformable
:
raise
ValueError
(
"Not supported yet."
)
box3d_tower
=
[]
for
i
in
range
(
num_convs
):
if
norm
in
(
"BN"
,
"FrozenBN"
,
"SyncBN"
,
"GN"
):
# NOTE: need to add norm here!
# Each FPN level has its own batchnorm layer.
# NOTE: do not use dd3d train.py!
# "BN" is converted to "SyncBN" in distributed training (see train.py)
norm_layer
=
ModuleListDial
([
get_norm
(
norm
,
in_channels
)
for
_
in
range
(
self
.
num_levels
)])
else
:
norm_layer
=
get_norm
(
norm
,
in_channels
)
box3d_tower
.
append
(
Conv2d
(
in_channels
,
in_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
norm_layer
is
None
,
norm
=
norm_layer
,
activation
=
F
.
relu
)
)
self
.
add_module
(
'box3d_tower'
,
nn
.
Sequential
(
*
box3d_tower
))
num_classes
=
self
.
num_classes
if
not
class_agnostic
else
1
num_levels
=
self
.
num_levels
if
use_per_level_predictors
else
1
# 3D box branches.
self
.
box3d_quat
=
nn
.
ModuleList
([
Conv2d
(
in_channels
,
4
*
num_classes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
for
_
in
range
(
num_levels
)
])
self
.
box3d_ctr
=
nn
.
ModuleList
([
Conv2d
(
in_channels
,
2
*
num_classes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
for
_
in
range
(
num_levels
)
])
self
.
box3d_depth
=
nn
.
ModuleList
([
Conv2d
(
in_channels
,
1
*
num_classes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
(
not
self
.
use_scale
))
for
_
in
range
(
num_levels
)
])
self
.
box3d_size
=
nn
.
ModuleList
([
Conv2d
(
in_channels
,
3
*
num_classes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
for
_
in
range
(
num_levels
)
])
self
.
box3d_conf
=
nn
.
ModuleList
([
Conv2d
(
in_channels
,
1
*
num_classes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
for
_
in
range
(
num_levels
)
])
if
self
.
use_scale
:
self
.
scales_proj_ctr
=
nn
.
ModuleList
([
Scale
(
init_value
=
stride
*
self
.
proj_ctr_scale_init_factor
)
for
stride
in
self
.
in_strides
])
# (pre-)compute (mean, std) of depth for each level, and determine the init value here.
self
.
scales_size
=
nn
.
ModuleList
([
Scale
(
init_value
=
1.0
)
for
_
in
range
(
self
.
num_levels
)])
self
.
scales_conf
=
nn
.
ModuleList
([
Scale
(
init_value
=
1.0
)
for
_
in
range
(
self
.
num_levels
)])
self
.
scales_depth
=
nn
.
ModuleList
([
Scale
(
init_value
=
sigma
*
self
.
depth_scale_init_factor
)
for
sigma
in
self
.
std_depth_per_level
])
self
.
offsets_depth
=
nn
.
ModuleList
([
Offset
(
init_value
=
b
)
for
b
in
self
.
mean_depth_per_level
])
self
.
_init_weights
()
def
_init_weights
(
self
):
for
l
in
self
.
box3d_tower
.
modules
():
if
isinstance
(
l
,
nn
.
Conv2d
):
torch
.
nn
.
init
.
kaiming_normal_
(
l
.
weight
,
mode
=
'fan_out'
,
nonlinearity
=
'relu'
)
if
l
.
bias
is
not
None
:
torch
.
nn
.
init
.
constant_
(
l
.
bias
,
0
)
predictors
=
[
self
.
box3d_quat
,
self
.
box3d_ctr
,
self
.
box3d_depth
,
self
.
box3d_size
,
self
.
box3d_conf
]
for
modules
in
predictors
:
for
l
in
modules
.
modules
():
if
isinstance
(
l
,
nn
.
Conv2d
):
torch
.
nn
.
init
.
kaiming_uniform_
(
l
.
weight
,
a
=
1
)
if
l
.
bias
is
not
None
:
# depth head may not have bias.
torch
.
nn
.
init
.
constant_
(
l
.
bias
,
0
)
def
forward
(
self
,
x
):
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
=
[],
[],
[],
[],
[]
dense_depth
=
None
for
l
,
features
in
enumerate
(
x
):
box3d_tower_out
=
self
.
box3d_tower
(
features
)
_l
=
l
if
self
.
use_per_level_predictors
else
0
# 3D box
quat
=
self
.
box3d_quat
[
_l
](
box3d_tower_out
)
proj_ctr
=
self
.
box3d_ctr
[
_l
](
box3d_tower_out
)
depth
=
self
.
box3d_depth
[
_l
](
box3d_tower_out
)
size3d
=
self
.
box3d_size
[
_l
](
box3d_tower_out
)
conf3d
=
self
.
box3d_conf
[
_l
](
box3d_tower_out
)
if
self
.
use_scale
:
# TODO: to optimize the runtime, apply this scaling in inference (and loss compute) only on FG pixels?
proj_ctr
=
self
.
scales_proj_ctr
[
l
](
proj_ctr
)
size3d
=
self
.
scales_size
[
l
](
size3d
)
conf3d
=
self
.
scales_conf
[
l
](
conf3d
)
depth
=
self
.
offsets_depth
[
l
](
self
.
scales_depth
[
l
](
depth
))
box3d_quat
.
append
(
quat
)
box3d_ctr
.
append
(
proj_ctr
)
box3d_depth
.
append
(
depth
)
box3d_size
.
append
(
size3d
)
box3d_conf
.
append
(
conf3d
)
return
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
dense_depth
class
FCOS3DLoss
(
nn
.
Module
):
def
__init__
(
self
,
num_classes
,
min_depth
=
0.1
,
max_depth
=
80.0
,
box3d_loss_weight
=
2.0
,
conf3d_loss_weight
=
1.0
,
conf_3d_temperature
=
1.0
,
smooth_l1_loss_beta
=
0.05
,
max_loss_per_group
=
20
,
predict_allocentric_rot
=
True
,
scale_depth_by_focal_lengths
=
True
,
scale_depth_by_focal_lengths_factor
=
500.0
,
class_agnostic
=
False
,
predict_distance
=
False
,
canon_box_sizes
=
None
):
super
().
__init__
()
self
.
canon_box_sizes
=
canon_box_sizes
self
.
min_depth
=
min_depth
self
.
max_depth
=
max_depth
self
.
predict_allocentric_rot
=
predict_allocentric_rot
self
.
scale_depth_by_focal_lengths
=
scale_depth_by_focal_lengths
self
.
scale_depth_by_focal_lengths_factor
=
scale_depth_by_focal_lengths_factor
self
.
predict_distance
=
predict_distance
self
.
box3d_reg_loss_fn
=
DisentangledBox3DLoss
(
smooth_l1_loss_beta
,
max_loss_per_group
)
self
.
box3d_loss_weight
=
box3d_loss_weight
self
.
conf3d_loss_weight
=
conf3d_loss_weight
self
.
conf_3d_temperature
=
conf_3d_temperature
self
.
num_classes
=
num_classes
self
.
class_agnostic
=
class_agnostic
@
force_fp32
(
apply_to
=
(
'box3d_quat'
,
'box3d_ctr'
,
'box3d_depth'
,
'box3d_size'
,
'box3d_conf'
,
'inv_intrinsics'
))
def
forward
(
self
,
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
dense_depth
,
inv_intrinsics
,
fcos2d_info
,
targets
):
labels
=
targets
[
'labels'
]
box3d_targets
=
targets
[
'box3d_targets'
]
pos_inds
=
targets
[
"pos_inds"
]
if
pos_inds
.
numel
()
==
0
:
losses
=
{
"loss_box3d_quat"
:
torch
.
stack
([
x
.
sum
()
*
0.
for
x
in
box3d_quat
]).
sum
(),
"loss_box3d_proj_ctr"
:
torch
.
stack
([
x
.
sum
()
*
0.
for
x
in
box3d_ctr
]).
sum
(),
"loss_box3d_depth"
:
torch
.
stack
([
x
.
sum
()
*
0.
for
x
in
box3d_depth
]).
sum
(),
"loss_box3d_size"
:
torch
.
stack
([
x
.
sum
()
*
0.
for
x
in
box3d_size
]).
sum
(),
"loss_conf3d"
:
torch
.
stack
([
x
.
sum
()
*
0.
for
x
in
box3d_conf
]).
sum
()
}
return
losses
if
len
(
labels
)
!=
len
(
box3d_targets
):
raise
ValueError
(
f
"The size of 'labels' and 'box3d_targets' does not match: a=
{
len
(
labels
)
}
, b=
{
len
(
box3d_targets
)
}
"
)
num_classes
=
self
.
num_classes
if
not
self
.
class_agnostic
else
1
box3d_quat_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
4
,
num_classes
)
for
x
in
box3d_quat
])
box3d_ctr_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
2
,
num_classes
)
for
x
in
box3d_ctr
])
box3d_depth_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
num_classes
)
for
x
in
box3d_depth
])
box3d_size_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
3
,
num_classes
)
for
x
in
box3d_size
])
box3d_conf_pred
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
num_classes
)
for
x
in
box3d_conf
])
# ----------------------
# 3D box disentangled loss
# ----------------------
box3d_targets
=
box3d_targets
[
pos_inds
]
box3d_quat_pred
=
box3d_quat_pred
[
pos_inds
]
box3d_ctr_pred
=
box3d_ctr_pred
[
pos_inds
]
box3d_depth_pred
=
box3d_depth_pred
[
pos_inds
]
box3d_size_pred
=
box3d_size_pred
[
pos_inds
]
box3d_conf_pred
=
box3d_conf_pred
[
pos_inds
]
if
self
.
class_agnostic
:
box3d_quat_pred
=
box3d_quat_pred
.
squeeze
(
-
1
)
box3d_ctr_pred
=
box3d_ctr_pred
.
squeeze
(
-
1
)
box3d_depth_pred
=
box3d_depth_pred
.
squeeze
(
-
1
)
box3d_size_pred
=
box3d_size_pred
.
squeeze
(
-
1
)
box3d_conf_pred
=
box3d_conf_pred
.
squeeze
(
-
1
)
else
:
I
=
labels
[
pos_inds
][...,
None
,
None
]
box3d_quat_pred
=
torch
.
gather
(
box3d_quat_pred
,
dim
=
2
,
index
=
I
.
repeat
(
1
,
4
,
1
)).
squeeze
(
-
1
)
box3d_ctr_pred
=
torch
.
gather
(
box3d_ctr_pred
,
dim
=
2
,
index
=
I
.
repeat
(
1
,
2
,
1
)).
squeeze
(
-
1
)
box3d_depth_pred
=
torch
.
gather
(
box3d_depth_pred
,
dim
=
1
,
index
=
I
.
squeeze
(
-
1
)).
squeeze
(
-
1
)
box3d_size_pred
=
torch
.
gather
(
box3d_size_pred
,
dim
=
2
,
index
=
I
.
repeat
(
1
,
3
,
1
)).
squeeze
(
-
1
)
box3d_conf_pred
=
torch
.
gather
(
box3d_conf_pred
,
dim
=
1
,
index
=
I
.
squeeze
(
-
1
)).
squeeze
(
-
1
)
canon_box_sizes
=
box3d_quat_pred
.
new_tensor
(
self
.
canon_box_sizes
)[
labels
[
pos_inds
]]
locations
=
targets
[
"locations"
][
pos_inds
]
im_inds
=
targets
[
"im_inds"
][
pos_inds
]
inv_intrinsics
=
inv_intrinsics
[
im_inds
]
box3d_pred
=
predictions_to_boxes3d
(
box3d_quat_pred
,
box3d_ctr_pred
,
box3d_depth_pred
,
box3d_size_pred
,
locations
,
inv_intrinsics
,
canon_box_sizes
,
self
.
min_depth
,
self
.
max_depth
,
scale_depth_by_focal_lengths_factor
=
self
.
scale_depth_by_focal_lengths_factor
,
scale_depth_by_focal_lengths
=
self
.
scale_depth_by_focal_lengths
,
quat_is_allocentric
=
self
.
predict_allocentric_rot
,
depth_is_distance
=
self
.
predict_distance
)
centerness_targets
=
fcos2d_info
[
"centerness_targets"
]
loss_denom
=
fcos2d_info
[
"loss_denom"
]
losses_box3d
,
box3d_l1_error
=
self
.
box3d_reg_loss_fn
(
box3d_pred
,
box3d_targets
,
locations
,
centerness_targets
)
losses_box3d
=
{
k
:
self
.
box3d_loss_weight
*
v
/
loss_denom
for
k
,
v
in
losses_box3d
.
items
()}
conf_3d_targets
=
torch
.
exp
(
-
1.
/
self
.
conf_3d_temperature
*
box3d_l1_error
)
loss_conf3d
=
F
.
binary_cross_entropy_with_logits
(
box3d_conf_pred
,
conf_3d_targets
,
reduction
=
'none'
)
loss_conf3d
=
self
.
conf3d_loss_weight
*
(
loss_conf3d
*
centerness_targets
).
sum
()
/
loss_denom
losses
=
{
"loss_conf3d"
:
loss_conf3d
,
**
losses_box3d
}
return
losses
class
FCOS3DInference
():
def
__init__
(
self
,
cfg
):
self
.
canon_box_sizes
=
cfg
.
DD3D
.
FCOS3D
.
CANONICAL_BOX3D_SIZES
self
.
min_depth
=
cfg
.
DD3D
.
FCOS3D
.
MIN_DEPTH
self
.
max_depth
=
cfg
.
DD3D
.
FCOS3D
.
MAX_DEPTH
self
.
predict_allocentric_rot
=
cfg
.
DD3D
.
FCOS3D
.
PREDICT_ALLOCENTRIC_ROT
self
.
scale_depth_by_focal_lengths
=
cfg
.
DD3D
.
FCOS3D
.
SCALE_DEPTH_BY_FOCAL_LENGTHS
self
.
scale_depth_by_focal_lengths_factor
=
cfg
.
DD3D
.
FCOS3D
.
SCALE_DEPTH_BY_FOCAL_LENGTHS_FACTOR
self
.
predict_distance
=
cfg
.
DD3D
.
FCOS3D
.
PREDICT_DISTANCE
self
.
num_classes
=
cfg
.
DD3D
.
NUM_CLASSES
self
.
class_agnostic
=
cfg
.
DD3D
.
FCOS3D
.
CLASS_AGNOSTIC_BOX3D
def
__call__
(
self
,
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
inv_intrinsics
,
pred_instances
,
fcos2d_info
):
# pred_instances: # List[List[Instances]], shape = (L, B)
for
lvl
,
(
box3d_quat_lvl
,
box3d_ctr_lvl
,
box3d_depth_lvl
,
box3d_size_lvl
,
box3d_conf_lvl
)
in
\
enumerate
(
zip
(
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
)):
# In-place modification: update per-level pred_instances.
self
.
forward_for_single_feature_map
(
box3d_quat_lvl
,
box3d_ctr_lvl
,
box3d_depth_lvl
,
box3d_size_lvl
,
box3d_conf_lvl
,
inv_intrinsics
,
pred_instances
[
lvl
],
fcos2d_info
[
lvl
]
)
# List of Instances; one for each image.
def
forward_for_single_feature_map
(
self
,
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
inv_intrinsics
,
pred_instances
,
fcos2d_info
):
N
=
box3d_quat
.
shape
[
0
]
num_classes
=
self
.
num_classes
if
not
self
.
class_agnostic
else
1
box3d_quat
=
box3d_quat
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
4
,
num_classes
)
box3d_ctr
=
box3d_ctr
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
2
,
num_classes
)
box3d_depth
=
box3d_depth
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
num_classes
)
box3d_size
=
box3d_size
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
3
,
num_classes
)
box3d_conf
=
box3d_conf
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
num_classes
).
sigmoid
()
for
i
in
range
(
N
):
fg_inds_per_im
=
fcos2d_info
[
'fg_inds_per_im'
][
i
]
class_inds_per_im
=
fcos2d_info
[
'class_inds_per_im'
][
i
]
topk_indices
=
fcos2d_info
[
'topk_indices'
][
i
]
box3d_quat_per_im
=
box3d_quat
[
i
][
fg_inds_per_im
]
box3d_ctr_per_im
=
box3d_ctr
[
i
][
fg_inds_per_im
]
box3d_depth_per_im
=
box3d_depth
[
i
][
fg_inds_per_im
]
box3d_size_per_im
=
box3d_size
[
i
][
fg_inds_per_im
]
box3d_conf_per_im
=
box3d_conf
[
i
][
fg_inds_per_im
]
if
self
.
class_agnostic
:
box3d_quat_per_im
=
box3d_quat_per_im
.
squeeze
(
-
1
)
box3d_ctr_per_im
=
box3d_ctr_per_im
.
squeeze
(
-
1
)
box3d_depth_per_im
=
box3d_depth_per_im
.
squeeze
(
-
1
)
box3d_size_per_im
=
box3d_size_per_im
.
squeeze
(
-
1
)
box3d_conf_per_im
=
box3d_conf_per_im
.
squeeze
(
-
1
)
else
:
I
=
class_inds_per_im
[...,
None
,
None
]
box3d_quat_per_im
=
torch
.
gather
(
box3d_quat_per_im
,
dim
=
2
,
index
=
I
.
repeat
(
1
,
4
,
1
)).
squeeze
(
-
1
)
box3d_ctr_per_im
=
torch
.
gather
(
box3d_ctr_per_im
,
dim
=
2
,
index
=
I
.
repeat
(
1
,
2
,
1
)).
squeeze
(
-
1
)
box3d_depth_per_im
=
torch
.
gather
(
box3d_depth_per_im
,
dim
=
1
,
index
=
I
.
squeeze
(
-
1
)).
squeeze
(
-
1
)
box3d_size_per_im
=
torch
.
gather
(
box3d_size_per_im
,
dim
=
2
,
index
=
I
.
repeat
(
1
,
3
,
1
)).
squeeze
(
-
1
)
box3d_conf_per_im
=
torch
.
gather
(
box3d_conf_per_im
,
dim
=
1
,
index
=
I
.
squeeze
(
-
1
)).
squeeze
(
-
1
)
if
topk_indices
is
not
None
:
box3d_quat_per_im
=
box3d_quat_per_im
[
topk_indices
]
box3d_ctr_per_im
=
box3d_ctr_per_im
[
topk_indices
]
box3d_depth_per_im
=
box3d_depth_per_im
[
topk_indices
]
box3d_size_per_im
=
box3d_size_per_im
[
topk_indices
]
box3d_conf_per_im
=
box3d_conf_per_im
[
topk_indices
]
# scores_per_im = pred_instances[i].scores.square()
# NOTE: Before refactoring, the squared score was used. Is raw 2D score better?
scores_per_im
=
pred_instances
[
i
].
scores
scores_3d_per_im
=
scores_per_im
*
box3d_conf_per_im
canon_box_sizes
=
box3d_quat
.
new_tensor
(
self
.
canon_box_sizes
)[
pred_instances
[
i
].
pred_classes
]
inv_K
=
inv_intrinsics
[
i
][
None
,
...].
expand
(
len
(
box3d_quat_per_im
),
3
,
3
)
locations
=
pred_instances
[
i
].
locations
pred_boxes3d
=
predictions_to_boxes3d
(
box3d_quat_per_im
,
box3d_ctr_per_im
,
box3d_depth_per_im
,
box3d_size_per_im
,
locations
,
inv_K
,
canon_box_sizes
,
self
.
min_depth
,
self
.
max_depth
,
scale_depth_by_focal_lengths_factor
=
self
.
scale_depth_by_focal_lengths_factor
,
scale_depth_by_focal_lengths
=
self
.
scale_depth_by_focal_lengths
,
quat_is_allocentric
=
self
.
predict_allocentric_rot
,
depth_is_distance
=
self
.
predict_distance
)
# In-place modification: add fields to instances.
pred_instances
[
i
].
pred_boxes3d
=
pred_boxes3d
pred_instances
[
i
].
scores_3d
=
scores_3d_per_im
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/nuscenes_dd3d.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
import
torch
import
torch.nn.functional
as
F
from
fvcore.nn.smooth_l1_loss
import
smooth_l1_loss
from
torch
import
nn
from
detectron2.layers
import
Conv2d
,
cat
#from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
from
detectron2.modeling.postprocessing
import
detector_postprocess
as
resize_instances
from
detectron2.structures
import
Instances
from
detectron2.utils
import
comm
as
d2_comm
from
mmdet.models.builder
import
HEADS
from
mmcv.runner
import
force_fp32
from
projects.mmdet3d_plugin.dd3d.datasets.nuscenes
import
MAX_NUM_ATTRIBUTES
from
.core
import
DD3D
#from tridet.modeling.dd3d.postprocessing import get_group_idxs, nuscenes_sample_aggregate
from
.prepare_targets
import
DD3DTargetPreparer
from
projects.mmdet3d_plugin.dd3d.structures.boxes3d
import
Boxes3D
from
projects.mmdet3d_plugin.dd3d.structures.image_list
import
ImageList
from
projects.mmdet3d_plugin.dd3d.utils.comm
import
reduce_sum
INF
=
100000000.
class
NuscenesDD3DTargetPreparer
(
DD3DTargetPreparer
):
def
__init__
(
self
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
assert
self
.
dd3d_enabled
,
f
"
{
type
(
self
).
__name__
}
requires dd3d_enabled = True"
def
__call__
(
self
,
locations
,
gt_instances
,
feature_shapes
):
num_loc_list
=
[
len
(
loc
)
for
loc
in
locations
]
# compute locations to size ranges
loc_to_size_range
=
[]
for
l
,
loc_per_level
in
enumerate
(
locations
):
loc_to_size_range_per_level
=
loc_per_level
.
new_tensor
(
self
.
sizes_of_interest
[
l
])
loc_to_size_range
.
append
(
loc_to_size_range_per_level
[
None
].
expand
(
num_loc_list
[
l
],
-
1
))
loc_to_size_range
=
torch
.
cat
(
loc_to_size_range
,
dim
=
0
)
locations
=
torch
.
cat
(
locations
,
dim
=
0
)
training_targets
=
self
.
compute_targets_for_locations
(
locations
,
gt_instances
,
loc_to_size_range
,
num_loc_list
)
training_targets
[
"locations"
]
=
[
locations
.
clone
()
for
_
in
range
(
len
(
gt_instances
))]
training_targets
[
"im_inds"
]
=
[
locations
.
new_ones
(
locations
.
size
(
0
),
dtype
=
torch
.
long
)
*
i
for
i
in
range
(
len
(
gt_instances
))
]
box2d
=
training_targets
.
pop
(
"box2d"
,
None
)
# transpose im first training_targets to level first ones
training_targets
=
{
k
:
self
.
_transpose
(
v
,
num_loc_list
)
for
k
,
v
in
training_targets
.
items
()
if
k
!=
"box2d"
}
training_targets
[
"fpn_levels"
]
=
[
loc
.
new_ones
(
len
(
loc
),
dtype
=
torch
.
long
)
*
level
for
level
,
loc
in
enumerate
(
training_targets
[
"locations"
])
]
# Flatten targets: (L x B x H x W, TARGET_SIZE)
labels
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"labels"
]])
box2d_reg_targets
=
cat
([
x
.
reshape
(
-
1
,
4
)
for
x
in
training_targets
[
"box2d_reg"
]])
target_inds
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"target_inds"
]])
locations
=
cat
([
x
.
reshape
(
-
1
,
2
)
for
x
in
training_targets
[
"locations"
]])
im_inds
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"im_inds"
]])
fpn_levels
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"fpn_levels"
]])
pos_inds
=
torch
.
nonzero
(
labels
!=
self
.
num_classes
).
squeeze
(
1
)
targets
=
{
"labels"
:
labels
,
"box2d_reg_targets"
:
box2d_reg_targets
,
"locations"
:
locations
,
"target_inds"
:
target_inds
,
"im_inds"
:
im_inds
,
"fpn_levels"
:
fpn_levels
,
"pos_inds"
:
pos_inds
}
if
self
.
dd3d_enabled
:
box3d_targets
=
Boxes3D
.
cat
(
training_targets
[
"box3d"
])
targets
.
update
({
"box3d_targets"
:
box3d_targets
})
if
box2d
is
not
None
:
# Original format is B x L x (H x W, 4)
# Need to be in L x (B, 4, H, W).
batched_box2d
=
[]
for
lvl
,
per_lvl_box2d
in
enumerate
(
zip
(
*
box2d
)):
# B x (H x W, 4)
h
,
w
=
feature_shapes
[
lvl
]
batched_box2d_lvl
=
torch
.
stack
([
x
.
T
.
reshape
(
4
,
h
,
w
)
for
x
in
per_lvl_box2d
],
dim
=
0
)
batched_box2d
.
append
(
batched_box2d_lvl
)
targets
.
update
({
"batched_box2d"
:
batched_box2d
})
# Nuscenes targets -- attribute / speed
attributes
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"attributes"
]])
speeds
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"speeds"
]])
targets
.
update
({
'attributes'
:
attributes
,
'speeds'
:
speeds
})
return
targets
def
compute_targets_for_locations
(
self
,
locations
,
targets
,
size_ranges
,
num_loc_list
):
labels
=
[]
box2d_reg
=
[]
if
self
.
dd3d_enabled
:
box3d
=
[]
target_inds
=
[]
xs
,
ys
=
locations
[:,
0
],
locations
[:,
1
]
# NuScenes targets -- attribute / speed
attributes
,
speeds
=
[],
[]
num_targets
=
0
for
im_i
in
range
(
len
(
targets
)):
targets_per_im
=
targets
[
im_i
]
bboxes
=
targets_per_im
.
gt_boxes
.
tensor
labels_per_im
=
targets_per_im
.
gt_classes
# no gt
if
bboxes
.
numel
()
==
0
:
labels
.
append
(
labels_per_im
.
new_zeros
(
locations
.
size
(
0
))
+
self
.
num_classes
)
# reg_targets.append(locations.new_zeros((locations.size(0), 4)))
box2d_reg
.
append
(
locations
.
new_zeros
((
locations
.
size
(
0
),
4
)))
target_inds
.
append
(
labels_per_im
.
new_zeros
(
locations
.
size
(
0
))
-
1
)
if
self
.
dd3d_enabled
:
box3d
.
append
(
Boxes3D
(
locations
.
new_zeros
(
locations
.
size
(
0
),
4
),
locations
.
new_zeros
(
locations
.
size
(
0
),
2
),
locations
.
new_zeros
(
locations
.
size
(
0
),
1
),
locations
.
new_zeros
(
locations
.
size
(
0
),
3
),
locations
.
new_zeros
(
locations
.
size
(
0
),
3
,
3
),
).
to
(
torch
.
float32
)
)
# NOTE: attributes and speeds.
attributes
.
append
(
labels_per_im
.
new_zeros
(
locations
.
size
(
0
)))
speeds
.
append
(
labels_per_im
.
new_zeros
(
locations
.
size
(
0
)))
continue
area
=
targets_per_im
.
gt_boxes
.
area
()
l
=
xs
[:,
None
]
-
bboxes
[:,
0
][
None
]
t
=
ys
[:,
None
]
-
bboxes
[:,
1
][
None
]
r
=
bboxes
[:,
2
][
None
]
-
xs
[:,
None
]
b
=
bboxes
[:,
3
][
None
]
-
ys
[:,
None
]
# reg_targets_per_im = torch.stack([l, t, r, b], dim=2)
box2d_reg_per_im
=
torch
.
stack
([
l
,
t
,
r
,
b
],
dim
=
2
)
if
self
.
center_sample
:
is_in_boxes
=
self
.
get_sample_region
(
bboxes
,
num_loc_list
,
xs
,
ys
)
else
:
is_in_boxes
=
box2d_reg_per_im
.
min
(
dim
=
2
)[
0
]
>
0
max_reg_targets_per_im
=
box2d_reg_per_im
.
max
(
dim
=
2
)[
0
]
# limit the regression range for each location
is_cared_in_the_level
=
\
(
max_reg_targets_per_im
>=
size_ranges
[:,
[
0
]])
&
\
(
max_reg_targets_per_im
<=
size_ranges
[:,
[
1
]])
locations_to_gt_area
=
area
[
None
].
repeat
(
len
(
locations
),
1
)
locations_to_gt_area
[
is_in_boxes
==
0
]
=
INF
locations_to_gt_area
[
is_cared_in_the_level
==
0
]
=
INF
# if there are still more than one objects for a location,
# we choose the one with minimal area
locations_to_min_area
,
locations_to_gt_inds
=
locations_to_gt_area
.
min
(
dim
=
1
)
box2d_reg_per_im
=
box2d_reg_per_im
[
range
(
len
(
locations
)),
locations_to_gt_inds
]
target_inds_per_im
=
locations_to_gt_inds
+
num_targets
num_targets
+=
len
(
targets_per_im
)
labels_per_im
=
labels_per_im
[
locations_to_gt_inds
]
labels_per_im
[
locations_to_min_area
==
INF
]
=
self
.
num_classes
labels
.
append
(
labels_per_im
)
box2d_reg
.
append
(
box2d_reg_per_im
)
target_inds
.
append
(
target_inds_per_im
)
if
self
.
dd3d_enabled
:
# 3D box targets
box3d_per_im
=
targets_per_im
.
gt_boxes3d
[
locations_to_gt_inds
]
box3d
.
append
(
box3d_per_im
)
# NuScenes targets -- attribute / speed
attributes_per_im
=
targets_per_im
.
gt_attributes
[
locations_to_gt_inds
]
speeds_per_im
=
targets_per_im
.
gt_speeds
[
locations_to_gt_inds
]
attributes
.
append
(
attributes_per_im
)
speeds
.
append
(
speeds_per_im
)
ret
=
{
"labels"
:
labels
,
"box2d_reg"
:
box2d_reg
,
"target_inds"
:
target_inds
}
if
self
.
dd3d_enabled
:
ret
.
update
({
"box3d"
:
box3d
})
# NuScenes targets -- attribute / speed
ret
.
update
({
"attributes"
:
attributes
,
"speeds"
:
speeds
})
return
ret
class
NuscenesLoss
(
nn
.
Module
):
def
__init__
(
self
,
attr_loss_weight
=
0.2
,
speed_loss_weight
=
0.2
):
super
().
__init__
()
self
.
attr_loss_weight
=
attr_loss_weight
self
.
speed_loss_weight
=
speed_loss_weight
@
force_fp32
(
apply_to
=
(
'attr_logits'
,
'speeds'
))
def
forward
(
self
,
attr_logits
,
speeds
,
fcos2d_info
,
targets
):
# Flatten predictions
attr_logits
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
MAX_NUM_ATTRIBUTES
)
for
x
in
attr_logits
])
speeds
=
cat
([
x
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
)
for
x
in
speeds
])
pos_inds
=
targets
[
'pos_inds'
]
losses
=
{}
# 1. Attributes
attr_logits
=
attr_logits
[
pos_inds
]
target_attr
=
targets
[
'attributes'
][
pos_inds
]
valid_attr_mask
=
target_attr
!=
MAX_NUM_ATTRIBUTES
# No attrs associated with class, or just attr missing.
if
pos_inds
.
numel
()
==
0
:
attr_weights
=
attr_logits
.
new_tensor
(
0.0
)
#torch.tensor(0.0).cuda()
else
:
attr_weights
=
fcos2d_info
[
'centerness_targets'
][
valid_attr_mask
]
# Denominator for all foreground losses -- re-computed for features with valid attributes.
# attr_loss_denom = max(reduce_sum(attr_weights.sum()).item() / d2_comm.get_world_size(), 1e-6)
# NOTE: compute attr_weights_sum, and then feed it to reduce_sum() works, but not above.
attr_weights_sum
=
attr_weights
.
sum
()
attr_loss_denom
=
max
(
reduce_sum
(
attr_weights_sum
).
item
()
/
d2_comm
.
get_world_size
(),
1e-6
)
if
valid_attr_mask
.
sum
()
==
0
:
losses
.
update
({
"loss_attr"
:
attr_logits
.
sum
()
*
0.
})
else
:
attr_logits
=
attr_logits
[
valid_attr_mask
]
target_attr
=
target_attr
[
valid_attr_mask
]
xent
=
F
.
cross_entropy
(
attr_logits
,
target_attr
)
loss_attr
=
(
xent
*
attr_weights
).
sum
()
/
attr_loss_denom
losses
.
update
({
"loss_attr"
:
self
.
attr_loss_weight
*
loss_attr
})
# 2. Speed
speeds
=
speeds
[
pos_inds
]
target_speeds
=
targets
[
'speeds'
][
pos_inds
]
# NOTE: some GT speeds are NaN.
valid_gt_mask
=
torch
.
logical_not
(
torch
.
isnan
(
target_speeds
))
if
pos_inds
.
numel
()
==
0
:
speed_weights
=
speeds
.
new_tensor
(
0.0
)
#torch.tensor(0.0).cuda()
else
:
speed_weights
=
fcos2d_info
[
'centerness_targets'
][
valid_gt_mask
]
# Denominator for all foreground losses -- re-computed for features with valid speeds.
# speed_loss_denom = max(reduce_sum(speed_weights.sum()).item() / d2_comm.get_world_size(), 1e-6)
speed_weights_sum
=
speed_weights
.
sum
()
speed_loss_denom
=
max
(
reduce_sum
(
speed_weights_sum
).
item
()
/
d2_comm
.
get_world_size
(),
1e-6
)
# NOTE: move after reduce sum
if
pos_inds
.
numel
()
==
0
:
losses
=
{
"loss_attr"
:
attr_logits
.
sum
()
*
0.
,
"loss_speed"
:
speeds
.
sum
()
*
0.
}
# NOTE: This is probably un-reachable, because the training filter images with empty annotations.
# NOTE: If not, attr_weights can be unavailable in the reduce_sum below().
return
losses
if
valid_gt_mask
.
sum
()
==
0
:
losses
.
update
({
"loss_speed"
:
speeds
.
sum
()
*
0.
})
# return losses
else
:
speeds
=
speeds
[
valid_gt_mask
]
target_speeds
=
target_speeds
[
valid_gt_mask
]
l1_error
=
smooth_l1_loss
(
speeds
,
target_speeds
,
beta
=
0.05
)
loss_speed
=
(
l1_error
*
speed_weights
).
sum
()
/
speed_loss_denom
losses
.
update
({
"loss_speed"
:
self
.
speed_loss_weight
*
loss_speed
})
return
losses
class
NuscenesInference
():
def
__init__
(
self
,
cfg
):
pass
def
__call__
(
self
,
attr_logits
,
speeds
,
pred_instances
,
fcos2d_info
):
"""Add 'pred_attribute', 'pred_speed' to Instances in 'pred_instances'."""
N
=
attr_logits
[
0
].
shape
[
0
]
for
lvl
,
(
attr_logits_lvl
,
speed_lvl
,
info_lvl
,
instances_lvl
)
in
\
enumerate
(
zip
(
attr_logits
,
speeds
,
fcos2d_info
,
pred_instances
)):
attr_logits_lvl
=
attr_logits_lvl
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
,
MAX_NUM_ATTRIBUTES
)
speed_lvl
=
speed_lvl
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
N
,
-
1
)
for
i
in
range
(
N
):
fg_inds_per_im
=
info_lvl
[
'fg_inds_per_im'
][
i
]
topk_indices
=
info_lvl
[
'topk_indices'
][
i
]
attr_logits_per_im
=
attr_logits_lvl
[
i
][
fg_inds_per_im
]
speed_per_im
=
speed_lvl
[
i
][
fg_inds_per_im
]
if
topk_indices
is
not
None
:
attr_logits_per_im
=
attr_logits_per_im
[
topk_indices
]
speed_per_im
=
speed_per_im
[
topk_indices
]
if
len
(
attr_logits_per_im
)
==
0
:
instances_lvl
[
i
].
pred_attributes
=
instances_lvl
[
i
].
pred_classes
.
new_tensor
([])
instances_lvl
[
i
].
pred_speeds
=
instances_lvl
[
i
].
scores
.
new_tensor
([])
else
:
instances_lvl
[
i
].
pred_attributes
=
attr_logits_per_im
.
argmax
(
dim
=
1
)
instances_lvl
[
i
].
pred_speeds
=
speed_per_im
@
HEADS
.
register_module
()
class
NuscenesDD3D
(
DD3D
):
def
__init__
(
self
,
num_classes
,
in_channels
,
strides
,
fcos2d_cfg
=
dict
(),
fcos2d_loss_cfg
=
dict
(),
fcos3d_cfg
=
dict
(),
fcos3d_loss_cfg
=
dict
(),
target_assign_cfg
=
dict
(),
nusc_loss_weight
=
dict
(),
box3d_on
=
True
,
feature_locations_offset
=
"none"
):
super
().
__init__
(
num_classes
,
in_channels
,
strides
,
fcos2d_cfg
=
fcos2d_cfg
,
fcos2d_loss_cfg
=
fcos2d_loss_cfg
,
fcos3d_cfg
=
fcos3d_cfg
,
fcos3d_loss_cfg
=
fcos3d_loss_cfg
,
target_assign_cfg
=
target_assign_cfg
,
box3d_on
=
box3d_on
,
feature_locations_offset
=
feature_locations_offset
)
# backbone_output_shape = self.backbone_output_shape
# in_channels = backbone_output_shape[0].channels
# --------------------------------------------------------------------------
# NuScenes predictions -- attribute / speed, computed from cls_tower output.
# --------------------------------------------------------------------------
self
.
attr_logits
=
Conv2d
(
in_channels
,
MAX_NUM_ATTRIBUTES
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
)
self
.
speed
=
Conv2d
(
in_channels
,
1
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
True
,
activation
=
F
.
relu
)
# init weights
for
modules
in
[
self
.
attr_logits
,
self
.
speed
]:
for
l
in
modules
.
modules
():
if
isinstance
(
l
,
nn
.
Conv2d
):
torch
.
nn
.
init
.
kaiming_uniform_
(
l
.
weight
,
a
=
1
)
if
l
.
bias
is
not
None
:
# depth head may not have bias.
torch
.
nn
.
init
.
constant_
(
l
.
bias
,
0
)
# Re-define target preparer
del
self
.
prepare_targets
self
.
prepare_targets
=
NuscenesDD3DTargetPreparer
(
num_classes
=
num_classes
,
input_shape
=
self
.
backbone_output_shape
,
box3d_on
=
box3d_on
,
**
target_assign_cfg
)
self
.
nuscenes_loss
=
NuscenesLoss
(
**
nusc_loss_weight
)
# NOTE: inference later
# self.nuscenes_inference = NuscenesInference(cfg)
# self.num_images_per_sample = cfg.MODEL.FCOS3D.NUSC_NUM_IMAGES_PER_SAMPLE
# NOTE: inference later
# self.num_images_per_sample = cfg.DD3D.NUSC.INFERENCE.NUM_IMAGES_PER_SAMPLE
# assert self.num_images_per_sample == 6
# assert cfg.DATALOADER.TEST.NUM_IMAGES_PER_GROUP == 6
# NOTE: NuScenes evaluator allows max. 500 detections per sample.
# self.max_num_dets_per_sample = cfg.DD3D.NUSC.INFERENCE.MAX_NUM_DETS_PER_SAMPLE
@
force_fp32
(
apply_to
=
(
'features'
))
def
forward
(
self
,
features
,
batched_inputs
):
# NOTE:
# images = [x["image"].to(self.device) for x in batched_inputs]
# images = [self.preprocess_image(x) for x in images]
# NOTE: directly use inv_intrinsics
# if 'intrinsics' in batched_inputs[0]:
# intrinsics = [x['intrinsics'].to(self.device) for x in batched_inputs]
# else:
# intrinsics = None
# images = ImageList.from_tensors(images, self.backbone.size_divisibility, intrinsics=intrinsics)
if
'inv_intrinsics'
in
batched_inputs
[
0
]:
inv_intrinsics
=
[
x
[
'inv_intrinsics'
].
to
(
features
[
0
].
device
)
for
x
in
batched_inputs
]
inv_intrinsics
=
torch
.
stack
(
inv_intrinsics
,
dim
=
0
)
else
:
inv_intrinsics
=
None
# NOTE:
# gt_dense_depth = None
# if 'depth' in batched_inputs[0]:
# gt_dense_depth = [x["depth"].to(self.device) for x in batched_inputs]
# gt_dense_depth = ImageList.from_tensors(
# gt_dense_depth, self.backbone.size_divisibility, intrinsics=intrinsics
# )
# NOTE: directly input feature
# features = self.backbone(images.tensor)
# features = [features[f] for f in self.in_features]
if
"instances"
in
batched_inputs
[
0
]:
gt_instances
=
[
x
[
"instances"
].
to
(
features
[
0
].
device
)
for
x
in
batched_inputs
]
else
:
gt_instances
=
None
locations
=
self
.
compute_locations
(
features
)
logits
,
box2d_reg
,
centerness
,
fcos2d_extra_output
=
self
.
fcos2d_head
(
features
)
if
not
self
.
only_box2d
:
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
dense_depth
=
self
.
fcos3d_head
(
features
)
# NOTE: directly use inv_intrinsics
# inv_intrinsics = images.intrinsics.inverse() if images.intrinsics is not None else None
# --------------------------------------------------------------------------
# NuScenes predictions -- attribute / speed, computed from cls_tower output.
# --------------------------------------------------------------------------
attr_logits
,
speeds
=
[],
[]
for
x
in
fcos2d_extra_output
[
'cls_tower_out'
]:
attr_logits
.
append
(
self
.
attr_logits
(
x
))
speeds
.
append
(
self
.
speed
(
x
))
if
self
.
training
:
assert
gt_instances
is
not
None
feature_shapes
=
[
x
.
shape
[
-
2
:]
for
x
in
features
]
training_targets
=
self
.
prepare_targets
(
locations
,
gt_instances
,
feature_shapes
)
# NOTE:
# if gt_dense_depth is not None:
# training_targets.update({"dense_depth": gt_dense_depth})
losses
=
{}
fcos2d_loss
,
fcos2d_info
=
self
.
fcos2d_loss
(
logits
,
box2d_reg
,
centerness
,
training_targets
)
losses
.
update
(
fcos2d_loss
)
if
not
self
.
only_box2d
:
fcos3d_loss
=
self
.
fcos3d_loss
(
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
dense_depth
,
inv_intrinsics
,
fcos2d_info
,
training_targets
)
losses
.
update
(
fcos3d_loss
)
# Nuscenes loss -- attribute / speed
nuscenes_loss
=
self
.
nuscenes_loss
(
attr_logits
,
speeds
,
fcos2d_info
,
training_targets
)
losses
.
update
(
nuscenes_loss
)
return
losses
else
:
# TODO: do not support inference now
raise
NotImplementedError
pred_instances
,
fcos2d_info
=
self
.
fcos2d_inference
(
logits
,
box2d_reg
,
centerness
,
locations
,
images
.
image_sizes
)
if
not
self
.
only_box2d
:
# This adds 'pred_boxes3d' and 'scores_3d' to Instances in 'pred_instances'.
self
.
fcos3d_inference
(
box3d_quat
,
box3d_ctr
,
box3d_depth
,
box3d_size
,
box3d_conf
,
inv_intrinsics
,
pred_instances
,
fcos2d_info
)
score_key
=
"scores_3d"
else
:
score_key
=
"scores"
# This adds 'pred_attributes', 'pred_speed' to Instances in 'pred_instances'.
self
.
nuscenes_inference
(
attr_logits
,
speeds
,
pred_instances
,
fcos2d_info
)
# Transpose to "image-first", i.e. (B, L)
pred_instances
=
list
(
zip
(
*
pred_instances
))
pred_instances
=
[
Instances
.
cat
(
instances
)
for
instances
in
pred_instances
]
# 2D NMS and pick top-K.
if
self
.
do_nms
:
pred_instances
=
self
.
fcos2d_inference
.
nms_and_top_k
(
pred_instances
,
score_key
)
if
not
self
.
only_box2d
and
self
.
do_bev_nms
:
# Bird-eye-view NMS.
dummy_group_idxs
=
{
i
:
[
i
]
for
i
,
_
in
enumerate
(
pred_instances
)}
if
'pose'
in
batched_inputs
[
0
]:
poses
=
[
x
[
'pose'
]
for
x
in
batched_inputs
]
else
:
poses
=
[
x
[
'extrinsics'
]
for
x
in
batched_inputs
]
pred_instances
=
nuscenes_sample_aggregate
(
pred_instances
,
dummy_group_idxs
,
self
.
num_classes
,
poses
,
iou_threshold
=
self
.
bev_nms_iou_thresh
,
include_boxes3d_global
=
False
)
if
self
.
postprocess_in_inference
:
processed_results
=
[]
for
results_per_image
,
input_per_image
,
image_size
in
\
zip
(
pred_instances
,
batched_inputs
,
images
.
image_sizes
):
height
=
input_per_image
.
get
(
"height"
,
image_size
[
0
])
width
=
input_per_image
.
get
(
"width"
,
image_size
[
1
])
r
=
resize_instances
(
results_per_image
,
height
,
width
)
processed_results
.
append
({
"instances"
:
r
})
# ----------------------------------------------------------
# NuScenes specific: cross-image (i.e. sample-level) BEV NMS.
# ----------------------------------------------------------
sample_tokens
=
[
x
[
'sample_token'
]
for
x
in
batched_inputs
]
group_idxs
=
get_group_idxs
(
sample_tokens
,
self
.
num_images_per_sample
)
instances
=
[
x
[
'instances'
]
for
x
in
processed_results
]
global_poses
=
[
x
[
'pose'
]
for
x
in
batched_inputs
]
filtered_instances
=
nuscenes_sample_aggregate
(
instances
,
group_idxs
,
self
.
num_classes
,
global_poses
,
self
.
bev_nms_iou_thresh
,
max_num_dets_per_sample
=
self
.
max_num_dets_per_sample
)
processed_results
=
[{
"instances"
:
x
}
for
x
in
filtered_instances
]
else
:
processed_results
=
[{
"instances"
:
x
}
for
x
in
pred_instances
]
return
processed_results
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/modeling/prepare_targets.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
import
torch
from
detectron2.layers
import
cat
from
projects.mmdet3d_plugin.dd3d.structures.boxes3d
import
Boxes3D
INF
=
100000000.
class
DD3DTargetPreparer
():
def
__init__
(
self
,
num_classes
,
input_shape
,
box3d_on
=
True
,
center_sample
=
True
,
pos_radius
=
1.5
,
sizes_of_interest
=
None
):
self
.
num_classes
=
num_classes
self
.
center_sample
=
center_sample
self
.
strides
=
[
shape
.
stride
for
shape
in
input_shape
]
self
.
radius
=
pos_radius
self
.
dd3d_enabled
=
box3d_on
# generate sizes of interest
# NOTE:
# soi = []
# prev_size = -1
# for s in sizes_of_interest:
# soi.append([prev_size, s])
# prev_size = s
# soi.append([prev_size, INF])
self
.
sizes_of_interest
=
sizes_of_interest
def
__call__
(
self
,
locations
,
gt_instances
,
feature_shapes
):
num_loc_list
=
[
len
(
loc
)
for
loc
in
locations
]
# compute locations to size ranges
loc_to_size_range
=
[]
for
l
,
loc_per_level
in
enumerate
(
locations
):
loc_to_size_range_per_level
=
loc_per_level
.
new_tensor
(
self
.
sizes_of_interest
[
l
])
loc_to_size_range
.
append
(
loc_to_size_range_per_level
[
None
].
expand
(
num_loc_list
[
l
],
-
1
))
loc_to_size_range
=
torch
.
cat
(
loc_to_size_range
,
dim
=
0
)
locations
=
torch
.
cat
(
locations
,
dim
=
0
)
training_targets
=
self
.
compute_targets_for_locations
(
locations
,
gt_instances
,
loc_to_size_range
,
num_loc_list
)
training_targets
[
"locations"
]
=
[
locations
.
clone
()
for
_
in
range
(
len
(
gt_instances
))]
training_targets
[
"im_inds"
]
=
[
locations
.
new_ones
(
locations
.
size
(
0
),
dtype
=
torch
.
long
)
*
i
for
i
in
range
(
len
(
gt_instances
))
]
box2d
=
training_targets
.
pop
(
"box2d"
,
None
)
# transpose im first training_targets to level first ones
training_targets
=
{
k
:
self
.
_transpose
(
v
,
num_loc_list
)
for
k
,
v
in
training_targets
.
items
()
if
k
!=
"box2d"
}
training_targets
[
"fpn_levels"
]
=
[
loc
.
new_ones
(
len
(
loc
),
dtype
=
torch
.
long
)
*
level
for
level
,
loc
in
enumerate
(
training_targets
[
"locations"
])
]
# Flatten targets: (L x B x H x W, TARGET_SIZE)
labels
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"labels"
]])
box2d_reg_targets
=
cat
([
x
.
reshape
(
-
1
,
4
)
for
x
in
training_targets
[
"box2d_reg"
]])
target_inds
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"target_inds"
]])
locations
=
cat
([
x
.
reshape
(
-
1
,
2
)
for
x
in
training_targets
[
"locations"
]])
im_inds
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"im_inds"
]])
fpn_levels
=
cat
([
x
.
reshape
(
-
1
)
for
x
in
training_targets
[
"fpn_levels"
]])
pos_inds
=
torch
.
nonzero
(
labels
!=
self
.
num_classes
).
squeeze
(
1
)
targets
=
{
"labels"
:
labels
,
"box2d_reg_targets"
:
box2d_reg_targets
,
"locations"
:
locations
,
"target_inds"
:
target_inds
,
"im_inds"
:
im_inds
,
"fpn_levels"
:
fpn_levels
,
"pos_inds"
:
pos_inds
}
if
self
.
dd3d_enabled
:
box3d_targets
=
Boxes3D
.
cat
(
training_targets
[
"box3d"
])
targets
.
update
({
"box3d_targets"
:
box3d_targets
})
if
box2d
is
not
None
:
# Original format is B x L x (H x W, 4)
# Need to be in L x (B, 4, H, W).
batched_box2d
=
[]
for
lvl
,
per_lvl_box2d
in
enumerate
(
zip
(
*
box2d
)):
# B x (H x W, 4)
h
,
w
=
feature_shapes
[
lvl
]
batched_box2d_lvl
=
torch
.
stack
([
x
.
T
.
reshape
(
4
,
h
,
w
)
for
x
in
per_lvl_box2d
],
dim
=
0
)
batched_box2d
.
append
(
batched_box2d_lvl
)
targets
.
update
({
"batched_box2d"
:
batched_box2d
})
return
targets
def
compute_targets_for_locations
(
self
,
locations
,
targets
,
size_ranges
,
num_loc_list
):
labels
=
[]
box2d_reg
=
[]
if
self
.
dd3d_enabled
:
box3d
=
[]
target_inds
=
[]
xs
,
ys
=
locations
[:,
0
],
locations
[:,
1
]
num_targets
=
0
for
im_i
in
range
(
len
(
targets
)):
targets_per_im
=
targets
[
im_i
]
bboxes
=
targets_per_im
.
gt_boxes
.
tensor
labels_per_im
=
targets_per_im
.
gt_classes
# no gt
if
bboxes
.
numel
()
==
0
:
labels
.
append
(
labels_per_im
.
new_zeros
(
locations
.
size
(
0
))
+
self
.
num_classes
)
# reg_targets.append(locations.new_zeros((locations.size(0), 4)))
box2d_reg
.
append
(
locations
.
new_zeros
((
locations
.
size
(
0
),
4
)))
target_inds
.
append
(
labels_per_im
.
new_zeros
(
locations
.
size
(
0
))
-
1
)
if
self
.
dd3d_enabled
:
box3d
.
append
(
Boxes3D
(
locations
.
new_zeros
(
locations
.
size
(
0
),
4
),
locations
.
new_zeros
(
locations
.
size
(
0
),
2
),
locations
.
new_zeros
(
locations
.
size
(
0
),
1
),
locations
.
new_zeros
(
locations
.
size
(
0
),
3
),
locations
.
new_zeros
(
locations
.
size
(
0
),
3
,
3
),
).
to
(
torch
.
float32
)
)
continue
area
=
targets_per_im
.
gt_boxes
.
area
()
l
=
xs
[:,
None
]
-
bboxes
[:,
0
][
None
]
t
=
ys
[:,
None
]
-
bboxes
[:,
1
][
None
]
r
=
bboxes
[:,
2
][
None
]
-
xs
[:,
None
]
b
=
bboxes
[:,
3
][
None
]
-
ys
[:,
None
]
# reg_targets_per_im = torch.stack([l, t, r, b], dim=2)
box2d_reg_per_im
=
torch
.
stack
([
l
,
t
,
r
,
b
],
dim
=
2
)
if
self
.
center_sample
:
is_in_boxes
=
self
.
get_sample_region
(
bboxes
,
num_loc_list
,
xs
,
ys
)
else
:
is_in_boxes
=
box2d_reg_per_im
.
min
(
dim
=
2
)[
0
]
>
0
max_reg_targets_per_im
=
box2d_reg_per_im
.
max
(
dim
=
2
)[
0
]
# limit the regression range for each location
is_cared_in_the_level
=
\
(
max_reg_targets_per_im
>=
size_ranges
[:,
[
0
]])
&
\
(
max_reg_targets_per_im
<=
size_ranges
[:,
[
1
]])
locations_to_gt_area
=
area
[
None
].
repeat
(
len
(
locations
),
1
)
locations_to_gt_area
[
is_in_boxes
==
0
]
=
INF
locations_to_gt_area
[
is_cared_in_the_level
==
0
]
=
INF
# if there are still more than one objects for a location,
# we choose the one with minimal area
locations_to_min_area
,
locations_to_gt_inds
=
locations_to_gt_area
.
min
(
dim
=
1
)
box2d_reg_per_im
=
box2d_reg_per_im
[
range
(
len
(
locations
)),
locations_to_gt_inds
]
target_inds_per_im
=
locations_to_gt_inds
+
num_targets
num_targets
+=
len
(
targets_per_im
)
labels_per_im
=
labels_per_im
[
locations_to_gt_inds
]
labels_per_im
[
locations_to_min_area
==
INF
]
=
self
.
num_classes
labels
.
append
(
labels_per_im
)
box2d_reg
.
append
(
box2d_reg_per_im
)
target_inds
.
append
(
target_inds_per_im
)
if
self
.
dd3d_enabled
:
# 3D box targets
box3d_per_im
=
targets_per_im
.
gt_boxes3d
[
locations_to_gt_inds
]
box3d
.
append
(
box3d_per_im
)
ret
=
{
"labels"
:
labels
,
"box2d_reg"
:
box2d_reg
,
"target_inds"
:
target_inds
}
if
self
.
dd3d_enabled
:
ret
.
update
({
"box3d"
:
box3d
})
return
ret
def
get_sample_region
(
self
,
boxes
,
num_loc_list
,
loc_xs
,
loc_ys
):
center_x
=
boxes
[...,
[
0
,
2
]].
sum
(
dim
=-
1
)
*
0.5
center_y
=
boxes
[...,
[
1
,
3
]].
sum
(
dim
=-
1
)
*
0.5
num_gts
=
boxes
.
shape
[
0
]
K
=
len
(
loc_xs
)
boxes
=
boxes
[
None
].
expand
(
K
,
num_gts
,
4
)
center_x
=
center_x
[
None
].
expand
(
K
,
num_gts
)
center_y
=
center_y
[
None
].
expand
(
K
,
num_gts
)
center_gt
=
boxes
.
new_zeros
(
boxes
.
shape
)
# no gt
if
center_x
.
numel
()
==
0
or
center_x
[...,
0
].
sum
()
==
0
:
return
loc_xs
.
new_zeros
(
loc_xs
.
shape
,
dtype
=
torch
.
uint8
)
beg
=
0
for
level
,
num_loc
in
enumerate
(
num_loc_list
):
end
=
beg
+
num_loc
stride
=
self
.
strides
[
level
]
*
self
.
radius
xmin
=
center_x
[
beg
:
end
]
-
stride
ymin
=
center_y
[
beg
:
end
]
-
stride
xmax
=
center_x
[
beg
:
end
]
+
stride
ymax
=
center_y
[
beg
:
end
]
+
stride
# limit sample region in gt
center_gt
[
beg
:
end
,
:,
0
]
=
torch
.
where
(
xmin
>
boxes
[
beg
:
end
,
:,
0
],
xmin
,
boxes
[
beg
:
end
,
:,
0
])
center_gt
[
beg
:
end
,
:,
1
]
=
torch
.
where
(
ymin
>
boxes
[
beg
:
end
,
:,
1
],
ymin
,
boxes
[
beg
:
end
,
:,
1
])
center_gt
[
beg
:
end
,
:,
2
]
=
torch
.
where
(
xmax
>
boxes
[
beg
:
end
,
:,
2
],
boxes
[
beg
:
end
,
:,
2
],
xmax
)
center_gt
[
beg
:
end
,
:,
3
]
=
torch
.
where
(
ymax
>
boxes
[
beg
:
end
,
:,
3
],
boxes
[
beg
:
end
,
:,
3
],
ymax
)
beg
=
end
left
=
loc_xs
[:,
None
]
-
center_gt
[...,
0
]
right
=
center_gt
[...,
2
]
-
loc_xs
[:,
None
]
top
=
loc_ys
[:,
None
]
-
center_gt
[...,
1
]
bottom
=
center_gt
[...,
3
]
-
loc_ys
[:,
None
]
center_bbox
=
torch
.
stack
((
left
,
top
,
right
,
bottom
),
-
1
)
inside_gt_bbox_mask
=
center_bbox
.
min
(
-
1
)[
0
]
>
0
return
inside_gt_bbox_mask
def
_transpose
(
self
,
training_targets
,
num_loc_list
):
'''
This function is used to transpose image first training targets to level first ones
:return: level first training targets
'''
if
isinstance
(
training_targets
[
0
],
Boxes3D
):
for
im_i
in
range
(
len
(
training_targets
)):
# training_targets[im_i] = torch.split(training_targets[im_i], num_loc_list, dim=0)
training_targets
[
im_i
]
=
training_targets
[
im_i
].
split
(
num_loc_list
,
dim
=
0
)
targets_level_first
=
[]
for
targets_per_level
in
zip
(
*
training_targets
):
targets_level_first
.
append
(
Boxes3D
.
cat
(
targets_per_level
,
dim
=
0
))
return
targets_level_first
for
im_i
in
range
(
len
(
training_targets
)):
training_targets
[
im_i
]
=
torch
.
split
(
training_targets
[
im_i
],
num_loc_list
,
dim
=
0
)
targets_level_first
=
[]
for
targets_per_level
in
zip
(
*
training_targets
):
targets_level_first
.
append
(
torch
.
cat
(
targets_per_level
,
dim
=
0
))
return
targets_level_first
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/__init__.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
from
.image_list
import
ImageList
docker-hub/BEVFormer/BEVFormer/projects/mmdet3d_plugin/dd3d/structures/boxes3d.py
0 → 100644
View file @
007f2e68
# Copyright 2021 Toyota Research Institute. All rights reserved.
import
numpy
as
np
import
torch
from
pyquaternion
import
Quaternion
from
torch.cuda
import
amp
from
projects.mmdet3d_plugin.dd3d.utils.geometry
import
unproject_points2d
import
projects.mmdet3d_plugin.dd3d.structures.transform3d
as
t3d
# yapf: disable
BOX3D_CORNER_MAPPING
=
[
[
1
,
1
,
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
],
[
1
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
1
],
[
1
,
1
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
]
]
# yapf: enable
def
quaternion_to_matrix
(
quaternions
:
torch
.
Tensor
)
->
torch
.
Tensor
:
"""
Convert rotations given as quaternions to rotation matrices.
Args:
quaternions: quaternions with real part first,
as tensor of shape (..., 4).
Returns:
Rotation matrices as tensor of shape (..., 3, 3).
"""
r
,
i
,
j
,
k
=
torch
.
unbind
(
quaternions
,
-
1
)
two_s
=
2.0
/
(
quaternions
*
quaternions
).
sum
(
-
1
)
o
=
torch
.
stack
(
(
1
-
two_s
*
(
j
*
j
+
k
*
k
),
two_s
*
(
i
*
j
-
k
*
r
),
two_s
*
(
i
*
k
+
j
*
r
),
two_s
*
(
i
*
j
+
k
*
r
),
1
-
two_s
*
(
i
*
i
+
k
*
k
),
two_s
*
(
j
*
k
-
i
*
r
),
two_s
*
(
i
*
k
-
j
*
r
),
two_s
*
(
j
*
k
+
i
*
r
),
1
-
two_s
*
(
i
*
i
+
j
*
j
),
),
-
1
,
)
return
o
.
reshape
(
quaternions
.
shape
[:
-
1
]
+
(
3
,
3
))
def
_to_tensor
(
x
,
dim
):
if
isinstance
(
x
,
torch
.
Tensor
):
x
=
x
.
to
(
torch
.
float32
)
elif
isinstance
(
x
,
np
.
ndarray
)
or
isinstance
(
x
,
list
)
or
isinstance
(
x
,
tuple
):
x
=
torch
.
tensor
(
x
,
dtype
=
torch
.
float32
)
elif
isinstance
(
x
,
Quaternion
):
x
=
torch
.
tensor
(
x
.
elements
,
dtype
=
torch
.
float32
)
else
:
raise
ValueError
(
f
"Unsupported type:
{
type
(
x
).
__name__
}
"
)
if
x
.
ndim
==
1
:
x
=
x
.
reshape
(
-
1
,
dim
)
elif
x
.
ndim
>
2
:
raise
ValueError
(
f
"Invalid shape of input:
{
x
.
shape
.
__str__
()
}
"
)
return
x
class
GenericBoxes3D
():
def
__init__
(
self
,
quat
,
tvec
,
size
):
self
.
quat
=
_to_tensor
(
quat
,
dim
=
4
)
self
.
_tvec
=
_to_tensor
(
tvec
,
dim
=
3
)
self
.
size
=
_to_tensor
(
size
,
dim
=
3
)
@
property
def
tvec
(
self
):
return
self
.
_tvec
@
property
@
amp
.
autocast
(
enabled
=
False
)
def
corners
(
self
):
allow_tf32
=
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
torch
.
backends
.
cudnn
.
allow_tf32
=
False
translation
=
t3d
.
Translate
(
self
.
tvec
,
device
=
self
.
device
)
R
=
quaternion_to_matrix
(
self
.
quat
)
rotation
=
t3d
.
Rotate
(
R
=
R
.
transpose
(
1
,
2
),
device
=
self
.
device
)
# Need to transpose to make it work.
tfm
=
rotation
.
compose
(
translation
)
_corners
=
0.5
*
self
.
quat
.
new_tensor
(
BOX3D_CORNER_MAPPING
).
T
# corners_in_obj_frame = self.size.unsqueeze(1) * _corners.unsqueeze(0)
lwh
=
self
.
size
[:,
[
1
,
0
,
2
]]
# wlh -> lwh
corners_in_obj_frame
=
lwh
.
unsqueeze
(
1
)
*
_corners
.
unsqueeze
(
0
)
corners3d
=
tfm
.
transform_points
(
corners_in_obj_frame
)
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
allow_tf32
torch
.
backends
.
cudnn
.
allow_tf32
=
allow_tf32
return
corners3d
@
classmethod
def
from_vectors
(
cls
,
vecs
,
device
=
"cpu"
):
"""
Parameters
----------
vecs: Iterable[np.ndarray]
Iterable of 10D pose representation.
intrinsics: np.ndarray
(3, 3) intrinsics matrix.
"""
quats
,
tvecs
,
sizes
=
[],
[],
[]
for
vec
in
vecs
:
quat
=
vec
[:
4
]
tvec
=
vec
[
4
:
7
]
size
=
vec
[
7
:]
quats
.
append
(
quat
)
tvecs
.
append
(
tvec
)
sizes
.
append
(
size
)
quats
=
torch
.
as_tensor
(
quats
,
dtype
=
torch
.
float32
,
device
=
device
)
tvecs
=
torch
.
as_tensor
(
tvecs
,
dtype
=
torch
.
float32
,
device
=
device
)
sizes
=
torch
.
as_tensor
(
sizes
,
device
=
device
)
return
cls
(
quats
,
tvecs
,
sizes
)
@
classmethod
def
cat
(
cls
,
boxes_list
,
dim
=
0
):
assert
isinstance
(
boxes_list
,
(
list
,
tuple
))
if
len
(
boxes_list
)
==
0
:
return
cls
(
torch
.
empty
(
0
),
torch
.
empty
(
0
),
torch
.
empty
(
0
))
assert
all
([
isinstance
(
box
,
GenericBoxes3D
)
for
box
in
boxes_list
])
# use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
quat
=
torch
.
cat
([
b
.
quat
for
b
in
boxes_list
],
dim
=
dim
)
tvec
=
torch
.
cat
([
b
.
tvec
for
b
in
boxes_list
],
dim
=
dim
)
size
=
torch
.
cat
([
b
.
size
for
b
in
boxes_list
],
dim
=
dim
)
cat_boxes
=
cls
(
quat
,
tvec
,
size
)
return
cat_boxes
def
split
(
self
,
split_sizes
,
dim
=
0
):
assert
sum
(
split_sizes
)
==
len
(
self
)
quat_list
=
torch
.
split
(
self
.
quat
,
split_sizes
,
dim
=
dim
)
tvec_list
=
torch
.
split
(
self
.
tvec
,
split_sizes
,
dim
=
dim
)
size_list
=
torch
.
split
(
self
.
size
,
split_sizes
,
dim
=
dim
)
return
[
GenericBoxes3D
(
*
x
)
for
x
in
zip
(
quat_list
,
tvec_list
,
size_list
)]
def
__getitem__
(
self
,
item
):
"""
"""
if
isinstance
(
item
,
int
):
return
GenericBoxes3D
(
self
.
quat
[
item
].
view
(
1
,
-
1
),
self
.
tvec
[
item
].
view
(
1
,
-
1
),
self
.
size
[
item
].
view
(
1
,
-
1
))
quat
=
self
.
quat
[
item
]
tvec
=
self
.
tvec
[
item
]
size
=
self
.
size
[
item
]
assert
quat
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
tvec
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
size
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
return
GenericBoxes3D
(
quat
,
tvec
,
size
)
def
__len__
(
self
):
assert
len
(
self
.
quat
)
==
len
(
self
.
tvec
)
==
len
(
self
.
size
)
return
self
.
quat
.
shape
[
0
]
def
clone
(
self
):
"""
"""
return
GenericBoxes3D
(
self
.
quat
.
clone
(),
self
.
tvec
.
clone
(),
self
.
size
.
clone
())
def
vectorize
(
self
):
xyz
=
self
.
tvec
return
torch
.
cat
([
self
.
quat
,
xyz
,
self
.
size
],
dim
=
1
)
@
property
def
device
(
self
):
return
self
.
quat
.
device
def
to
(
self
,
*
args
,
**
kwargs
):
quat
=
self
.
quat
.
to
(
*
args
,
**
kwargs
)
tvec
=
self
.
tvec
.
to
(
*
args
,
**
kwargs
)
size
=
self
.
size
.
to
(
*
args
,
**
kwargs
)
return
GenericBoxes3D
(
quat
,
tvec
,
size
)
class
Boxes3D
(
GenericBoxes3D
):
"""Vision-based 3D box container.
The tvec is computed from projected center, depth, and intrinsics.
"""
def
__init__
(
self
,
quat
,
proj_ctr
,
depth
,
size
,
inv_intrinsics
):
self
.
quat
=
quat
self
.
proj_ctr
=
proj_ctr
self
.
depth
=
depth
self
.
size
=
size
self
.
inv_intrinsics
=
inv_intrinsics
@
property
def
tvec
(
self
):
ray
=
unproject_points2d
(
self
.
proj_ctr
,
self
.
inv_intrinsics
)
xyz
=
ray
*
self
.
depth
return
xyz
@
classmethod
def
from_vectors
(
cls
,
vecs
,
intrinsics
,
device
=
"cpu"
):
"""
Parameters
----------
vecs: Iterable[np.ndarray]
Iterable of 10D pose representation.
intrinsics: np.ndarray
(3, 3) intrinsics matrix.
"""
if
len
(
vecs
)
==
0
:
quats
=
torch
.
as_tensor
([],
dtype
=
torch
.
float32
,
device
=
device
).
view
(
-
1
,
4
)
proj_ctrs
=
torch
.
as_tensor
([],
dtype
=
torch
.
float32
,
device
=
device
).
view
(
-
1
,
2
)
depths
=
torch
.
as_tensor
([],
dtype
=
torch
.
float32
,
device
=
device
).
view
(
-
1
,
1
)
sizes
=
torch
.
as_tensor
([],
dtype
=
torch
.
float32
,
device
=
device
).
view
(
-
1
,
3
)
inv_intrinsics
=
torch
.
as_tensor
([],
dtype
=
torch
.
float32
,
device
=
device
).
view
(
-
1
,
3
,
3
)
return
cls
(
quats
,
proj_ctrs
,
depths
,
sizes
,
inv_intrinsics
)
quats
,
proj_ctrs
,
depths
,
sizes
=
[],
[],
[],
[]
for
vec
in
vecs
:
quat
=
vec
[:
4
]
proj_ctr
=
intrinsics
.
dot
(
vec
[
4
:
7
])
proj_ctr
=
proj_ctr
[:
2
]
/
proj_ctr
[
-
1
]
depth
=
vec
[
6
:
7
]
size
=
vec
[
7
:]
quats
.
append
(
quat
)
proj_ctrs
.
append
(
proj_ctr
)
depths
.
append
(
depth
)
sizes
.
append
(
size
)
quats
=
torch
.
as_tensor
(
np
.
array
(
quats
),
dtype
=
torch
.
float32
,
device
=
device
)
proj_ctrs
=
torch
.
as_tensor
(
np
.
array
(
proj_ctrs
),
dtype
=
torch
.
float32
,
device
=
device
)
depths
=
torch
.
as_tensor
(
np
.
array
(
depths
),
dtype
=
torch
.
float32
,
device
=
device
)
sizes
=
torch
.
as_tensor
(
np
.
array
(
sizes
),
dtype
=
torch
.
float32
,
device
=
device
)
inv_intrinsics
=
np
.
linalg
.
inv
(
intrinsics
)
inv_intrinsics
=
torch
.
as_tensor
(
inv_intrinsics
[
None
,
...],
device
=
device
).
expand
(
len
(
vecs
),
3
,
3
)
return
cls
(
quats
,
proj_ctrs
,
depths
,
sizes
,
inv_intrinsics
)
@
classmethod
def
cat
(
cls
,
boxes_list
,
dim
=
0
):
assert
isinstance
(
boxes_list
,
(
list
,
tuple
))
if
len
(
boxes_list
)
==
0
:
return
cls
(
torch
.
empty
(
0
),
torch
.
empty
(
0
),
torch
.
empty
(
0
),
torch
.
empty
(
0
),
torch
.
empty
(
0
))
assert
all
([
isinstance
(
box
,
Boxes3D
)
for
box
in
boxes_list
])
# use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
quat
=
torch
.
cat
([
b
.
quat
for
b
in
boxes_list
],
dim
=
dim
)
proj_ctr
=
torch
.
cat
([
b
.
proj_ctr
for
b
in
boxes_list
],
dim
=
dim
)
depth
=
torch
.
cat
([
b
.
depth
for
b
in
boxes_list
],
dim
=
dim
)
size
=
torch
.
cat
([
b
.
size
for
b
in
boxes_list
],
dim
=
dim
)
inv_intrinsics
=
torch
.
cat
([
b
.
inv_intrinsics
for
b
in
boxes_list
],
dim
=
dim
)
cat_boxes
=
cls
(
quat
,
proj_ctr
,
depth
,
size
,
inv_intrinsics
)
return
cat_boxes
def
split
(
self
,
split_sizes
,
dim
=
0
):
assert
sum
(
split_sizes
)
==
len
(
self
)
quat_list
=
torch
.
split
(
self
.
quat
,
split_sizes
,
dim
=
dim
)
proj_ctr_list
=
torch
.
split
(
self
.
proj_ctr
,
split_sizes
,
dim
=
dim
)
depth_list
=
torch
.
split
(
self
.
depth
,
split_sizes
,
dim
=
dim
)
size_list
=
torch
.
split
(
self
.
size
,
split_sizes
,
dim
=
dim
)
inv_K_list
=
torch
.
split
(
self
.
inv_intrinsics
,
split_sizes
,
dim
=
dim
)
return
[
Boxes3D
(
*
x
)
for
x
in
zip
(
quat_list
,
proj_ctr_list
,
depth_list
,
size_list
,
inv_K_list
)]
def
__getitem__
(
self
,
item
):
"""
"""
if
isinstance
(
item
,
int
):
return
Boxes3D
(
self
.
quat
[
item
].
view
(
1
,
-
1
),
self
.
proj_ctr
[
item
].
view
(
1
,
-
1
),
self
.
depth
[
item
].
view
(
1
,
-
1
),
self
.
size
[
item
].
view
(
1
,
-
1
),
self
.
inv_intrinsics
[
item
].
view
(
1
,
3
,
3
)
)
quat
=
self
.
quat
[
item
]
ctr
=
self
.
proj_ctr
[
item
]
depth
=
self
.
depth
[
item
]
size
=
self
.
size
[
item
]
inv_K
=
self
.
inv_intrinsics
[
item
]
assert
quat
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
ctr
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
depth
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
size
.
dim
()
==
2
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
inv_K
.
dim
()
==
3
,
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
assert
inv_K
.
shape
[
1
:]
==
(
3
,
3
),
"Indexing on Boxes3D with {} failed to return a matrix!"
.
format
(
item
)
return
Boxes3D
(
quat
,
ctr
,
depth
,
size
,
inv_K
)
def
__len__
(
self
):
assert
len
(
self
.
quat
)
==
len
(
self
.
proj_ctr
)
==
len
(
self
.
depth
)
==
len
(
self
.
size
)
==
len
(
self
.
inv_intrinsics
)
return
self
.
quat
.
shape
[
0
]
def
clone
(
self
):
"""
"""
return
Boxes3D
(
self
.
quat
.
clone
(),
self
.
proj_ctr
.
clone
(),
self
.
depth
.
clone
(),
self
.
size
.
clone
(),
self
.
inv_intrinsics
.
clone
()
)
def
to
(
self
,
*
args
,
**
kwargs
):
quat
=
self
.
quat
.
to
(
*
args
,
**
kwargs
)
proj_ctr
=
self
.
proj_ctr
.
to
(
*
args
,
**
kwargs
)
depth
=
self
.
depth
.
to
(
*
args
,
**
kwargs
)
size
=
self
.
size
.
to
(
*
args
,
**
kwargs
)
inv_K
=
self
.
inv_intrinsics
.
to
(
*
args
,
**
kwargs
)
return
Boxes3D
(
quat
,
proj_ctr
,
depth
,
size
,
inv_K
)
Prev
1
…
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment