Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
19472568
Commit
19472568
authored
Apr 08, 2026
by
雍大凯
Browse files
将子模块转换为普通目录
parent
51e55208
Changes
233
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3313 additions
and
0 deletions
+3313
-0
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/grid_mask.py
...2/MapTR/projects/mmdet3d_plugin/models/utils/grid_mask.py
+127
-0
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/inverted_residual.py
...projects/mmdet3d_plugin/models/utils/inverted_residual.py
+128
-0
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/make_divisible.py
...TR/projects/mmdet3d_plugin/models/utils/make_divisible.py
+25
-0
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/position_embedding.py
...rojects/mmdet3d_plugin/models/utils/position_embedding.py
+34
-0
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/se_layer.py
...v2/MapTR/projects/mmdet3d_plugin/models/utils/se_layer.py
+124
-0
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/visual.py
...TRv2/MapTR/projects/mmdet3d_plugin/models/utils/visual.py
+24
-0
docker-hub/MapTRv2/MapTR/requirement.txt
docker-hub/MapTRv2/MapTR/requirement.txt
+2
-0
docker-hub/MapTRv2/MapTR/test.py
docker-hub/MapTRv2/MapTR/test.py
+7
-0
docker-hub/MapTRv2/MapTR/tools/analysis_tools/__init__.py
docker-hub/MapTRv2/MapTR/tools/analysis_tools/__init__.py
+0
-0
docker-hub/MapTRv2/MapTR/tools/analysis_tools/analyze_logs.py
...er-hub/MapTRv2/MapTR/tools/analysis_tools/analyze_logs.py
+201
-0
docker-hub/MapTRv2/MapTR/tools/analysis_tools/benchmark.py
docker-hub/MapTRv2/MapTR/tools/analysis_tools/benchmark.py
+98
-0
docker-hub/MapTRv2/MapTR/tools/analysis_tools/get_params.py
docker-hub/MapTRv2/MapTR/tools/analysis_tools/get_params.py
+10
-0
docker-hub/MapTRv2/MapTR/tools/analysis_tools/visual.py
docker-hub/MapTRv2/MapTR/tools/analysis_tools/visual.py
+477
-0
docker-hub/MapTRv2/MapTR/tools/create_data.py
docker-hub/MapTRv2/MapTR/tools/create_data.py
+305
-0
docker-hub/MapTRv2/MapTR/tools/data_converter/__init__.py
docker-hub/MapTRv2/MapTR/tools/data_converter/__init__.py
+1
-0
docker-hub/MapTRv2/MapTR/tools/data_converter/av2_converter.py
...r-hub/MapTRv2/MapTR/tools/data_converter/av2_converter.py
+204
-0
docker-hub/MapTRv2/MapTR/tools/data_converter/create_gt_database.py
.../MapTRv2/MapTR/tools/data_converter/create_gt_database.py
+338
-0
docker-hub/MapTRv2/MapTR/tools/data_converter/indoor_converter.py
...ub/MapTRv2/MapTR/tools/data_converter/indoor_converter.py
+108
-0
docker-hub/MapTRv2/MapTR/tools/data_converter/kitti_converter.py
...hub/MapTRv2/MapTR/tools/data_converter/kitti_converter.py
+546
-0
docker-hub/MapTRv2/MapTR/tools/data_converter/kitti_data_utils.py
...ub/MapTRv2/MapTR/tools/data_converter/kitti_data_utils.py
+554
-0
No files found.
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/grid_mask.py
0 → 100644
View file @
19472568
import
torch
import
torch.nn
as
nn
import
numpy
as
np
from
PIL
import
Image
from
mmcv.runner
import
force_fp32
,
auto_fp16
class
Grid
(
object
):
def
__init__
(
self
,
use_h
,
use_w
,
rotate
=
1
,
offset
=
False
,
ratio
=
0.5
,
mode
=
0
,
prob
=
1.
):
self
.
use_h
=
use_h
self
.
use_w
=
use_w
self
.
rotate
=
rotate
self
.
offset
=
offset
self
.
ratio
=
ratio
self
.
mode
=
mode
self
.
st_prob
=
prob
self
.
prob
=
prob
def
set_prob
(
self
,
epoch
,
max_epoch
):
self
.
prob
=
self
.
st_prob
*
epoch
/
max_epoch
def
__call__
(
self
,
img
,
label
):
if
np
.
random
.
rand
()
>
self
.
prob
:
return
img
,
label
h
=
img
.
size
(
1
)
w
=
img
.
size
(
2
)
self
.
d1
=
2
self
.
d2
=
min
(
h
,
w
)
hh
=
int
(
1.5
*
h
)
ww
=
int
(
1.5
*
w
)
d
=
np
.
random
.
randint
(
self
.
d1
,
self
.
d2
)
if
self
.
ratio
==
1
:
self
.
l
=
np
.
random
.
randint
(
1
,
d
)
else
:
self
.
l
=
min
(
max
(
int
(
d
*
self
.
ratio
+
0.5
),
1
),
d
-
1
)
mask
=
np
.
ones
((
hh
,
ww
),
np
.
float32
)
st_h
=
np
.
random
.
randint
(
d
)
st_w
=
np
.
random
.
randint
(
d
)
if
self
.
use_h
:
for
i
in
range
(
hh
//
d
):
s
=
d
*
i
+
st_h
t
=
min
(
s
+
self
.
l
,
hh
)
mask
[
s
:
t
,:]
*=
0
if
self
.
use_w
:
for
i
in
range
(
ww
//
d
):
s
=
d
*
i
+
st_w
t
=
min
(
s
+
self
.
l
,
ww
)
mask
[:,
s
:
t
]
*=
0
r
=
np
.
random
.
randint
(
self
.
rotate
)
mask
=
Image
.
fromarray
(
np
.
uint8
(
mask
))
mask
=
mask
.
rotate
(
r
)
mask
=
np
.
asarray
(
mask
)
mask
=
mask
[(
hh
-
h
)
//
2
:(
hh
-
h
)
//
2
+
h
,
(
ww
-
w
)
//
2
:(
ww
-
w
)
//
2
+
w
]
mask
=
torch
.
from_numpy
(
mask
).
float
()
if
self
.
mode
==
1
:
mask
=
1
-
mask
mask
=
mask
.
expand_as
(
img
)
if
self
.
offset
:
offset
=
torch
.
from_numpy
(
2
*
(
np
.
random
.
rand
(
h
,
w
)
-
0.5
)).
float
()
offset
=
(
1
-
mask
)
*
offset
img
=
img
*
mask
+
offset
else
:
img
=
img
*
mask
return
img
,
label
class
GridMask
(
nn
.
Module
):
def
__init__
(
self
,
use_h
,
use_w
,
rotate
=
1
,
offset
=
False
,
ratio
=
0.5
,
mode
=
0
,
prob
=
1.
):
super
(
GridMask
,
self
).
__init__
()
self
.
use_h
=
use_h
self
.
use_w
=
use_w
self
.
rotate
=
rotate
self
.
offset
=
offset
self
.
ratio
=
ratio
self
.
mode
=
mode
self
.
st_prob
=
prob
self
.
prob
=
prob
self
.
fp16_enable
=
False
def
set_prob
(
self
,
epoch
,
max_epoch
):
self
.
prob
=
self
.
st_prob
*
epoch
/
max_epoch
#+ 1.#0.5
@
auto_fp16
()
@
torch
.
_dynamo
.
disable
def
forward
(
self
,
x
):
if
np
.
random
.
rand
()
>
self
.
prob
or
not
self
.
training
:
return
x
n
,
c
,
h
,
w
=
x
.
size
()
x
=
x
.
view
(
-
1
,
h
,
w
)
hh
=
int
(
1.5
*
h
)
ww
=
int
(
1.5
*
w
)
d
=
np
.
random
.
randint
(
2
,
h
)
self
.
l
=
min
(
max
(
int
(
d
*
self
.
ratio
+
0.5
),
1
),
d
-
1
)
mask
=
np
.
ones
((
hh
,
ww
),
np
.
float32
)
st_h
=
np
.
random
.
randint
(
d
)
st_w
=
np
.
random
.
randint
(
d
)
if
self
.
use_h
:
for
i
in
range
(
hh
//
d
):
s
=
d
*
i
+
st_h
t
=
min
(
s
+
self
.
l
,
hh
)
mask
[
s
:
t
,:]
*=
0
if
self
.
use_w
:
for
i
in
range
(
ww
//
d
):
s
=
d
*
i
+
st_w
t
=
min
(
s
+
self
.
l
,
ww
)
mask
[:,
s
:
t
]
*=
0
r
=
np
.
random
.
randint
(
self
.
rotate
)
mask
=
Image
.
fromarray
(
np
.
uint8
(
mask
))
mask
=
mask
.
rotate
(
r
)
mask
=
np
.
asarray
(
mask
)
mask
=
mask
[(
hh
-
h
)
//
2
:(
hh
-
h
)
//
2
+
h
,
(
ww
-
w
)
//
2
:(
ww
-
w
)
//
2
+
w
]
# mask = torch.from_numpy(mask).to(x.dtype).cuda()
mask
=
torch
.
from_numpy
(
mask
.
copy
()).
to
(
x
.
dtype
).
cuda
()
if
self
.
mode
==
1
:
mask
=
1
-
mask
mask
=
mask
.
expand_as
(
x
)
if
self
.
offset
:
offset
=
torch
.
from_numpy
(
2
*
(
np
.
random
.
rand
(
h
,
w
)
-
0.5
)).
to
(
x
.
dtype
).
cuda
()
x
=
x
*
mask
+
offset
*
(
1
-
mask
)
else
:
x
=
x
*
mask
return
x
.
view
(
n
,
c
,
h
,
w
)
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/inverted_residual.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
torch.nn
as
nn
import
torch.utils.checkpoint
as
cp
from
mmcv.cnn
import
ConvModule
from
mmcv.cnn.bricks
import
DropPath
from
mmcv.runner
import
BaseModule
from
.se_layer
import
SELayer
class
InvertedResidual
(
BaseModule
):
"""Inverted Residual Block.
Args:
in_channels (int): The input channels of this Module.
out_channels (int): The output channels of this Module.
mid_channels (int): The input channels of the depthwise convolution.
kernel_size (int): The kernel size of the depthwise convolution.
Default: 3.
stride (int): The stride of the depthwise convolution. Default: 1.
se_cfg (dict): Config dict for se layer. Default: None, which means no
se layer.
with_expand_conv (bool): Use expand conv or not. If set False,
mid_channels must be the same with in_channels.
Default: True.
conv_cfg (dict): Config dict for convolution layer. Default: None,
which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
drop_path_rate (float): stochastic depth rate. Defaults to 0.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default: False.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None
Returns:
Tensor: The output tensor.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
mid_channels
,
kernel_size
=
3
,
stride
=
1
,
se_cfg
=
None
,
with_expand_conv
=
True
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'BN'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
drop_path_rate
=
0.
,
with_cp
=
False
,
init_cfg
=
None
):
super
(
InvertedResidual
,
self
).
__init__
(
init_cfg
)
self
.
with_res_shortcut
=
(
stride
==
1
and
in_channels
==
out_channels
)
assert
stride
in
[
1
,
2
],
f
'stride must in [1, 2]. '
\
f
'But received
{
stride
}
.'
self
.
with_cp
=
with_cp
self
.
drop_path
=
DropPath
(
drop_path_rate
)
if
drop_path_rate
>
0
else
nn
.
Identity
()
self
.
with_se
=
se_cfg
is
not
None
self
.
with_expand_conv
=
with_expand_conv
if
self
.
with_se
:
assert
isinstance
(
se_cfg
,
dict
)
if
not
self
.
with_expand_conv
:
assert
mid_channels
==
in_channels
if
self
.
with_expand_conv
:
self
.
expand_conv
=
ConvModule
(
in_channels
=
in_channels
,
out_channels
=
mid_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
)
self
.
depthwise_conv
=
ConvModule
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
kernel_size
//
2
,
groups
=
mid_channels
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
)
if
self
.
with_se
:
self
.
se
=
SELayer
(
**
se_cfg
)
self
.
linear_conv
=
ConvModule
(
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
None
)
def
forward
(
self
,
x
):
def
_inner_forward
(
x
):
out
=
x
if
self
.
with_expand_conv
:
out
=
self
.
expand_conv
(
out
)
out
=
self
.
depthwise_conv
(
out
)
if
self
.
with_se
:
out
=
self
.
se
(
out
)
out
=
self
.
linear_conv
(
out
)
if
self
.
with_res_shortcut
:
return
x
+
self
.
drop_path
(
out
)
else
:
return
out
if
self
.
with_cp
and
x
.
requires_grad
:
out
=
cp
.
checkpoint
(
_inner_forward
,
x
)
else
:
out
=
_inner_forward
(
x
)
return
out
\ No newline at end of file
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/make_divisible.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
def
make_divisible
(
value
,
divisor
,
min_value
=
None
,
min_ratio
=
0.9
):
"""Make divisible function.
This function rounds the channel number to the nearest value that can be
divisible by the divisor. It is taken from the original tf repo. It ensures
that all layers have a channel number that is divisible by divisor. It can
be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa
Args:
value (int): The original channel number.
divisor (int): The divisor to fully divide the channel number.
min_value (int): The minimum value of the output channel.
Default: None, means that the minimum value equal to the divisor.
min_ratio (float): The minimum ratio of the rounded channel number to
the original channel number. Default: 0.9.
Returns:
int: The modified output channel number.
"""
if
min_value
is
None
:
min_value
=
divisor
new_value
=
max
(
min_value
,
int
(
value
+
divisor
/
2
)
//
divisor
*
divisor
)
# Make sure that round down does not go down by more than (1-min_ratio).
if
new_value
<
min_ratio
*
value
:
new_value
+=
divisor
return
new_value
\ No newline at end of file
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/position_embedding.py
0 → 100644
View file @
19472568
import
torch
import
torch.nn
as
nn
import
math
class
RelPositionEmbedding
(
nn
.
Module
):
def
__init__
(
self
,
num_pos_feats
=
64
,
pos_norm
=
True
):
super
().
__init__
()
self
.
num_pos_feats
=
num_pos_feats
self
.
fc
=
nn
.
Linear
(
4
,
self
.
num_pos_feats
,
bias
=
False
)
#nn.init.orthogonal_(self.fc.weight)
#self.fc.weight.requires_grad = False
self
.
pos_norm
=
pos_norm
if
self
.
pos_norm
:
self
.
norm
=
nn
.
LayerNorm
(
self
.
num_pos_feats
)
def
forward
(
self
,
tensor
):
#mask = nesttensor.mask
B
,
C
,
H
,
W
=
tensor
.
shape
#print('tensor.shape', tensor.shape)
y_range
=
(
torch
.
arange
(
H
)
/
float
(
H
-
1
)).
to
(
tensor
.
device
)
#y_axis = torch.stack((y_range, 1-y_range),dim=1)
y_axis
=
torch
.
stack
((
torch
.
cos
(
y_range
*
math
.
pi
),
torch
.
sin
(
y_range
*
math
.
pi
)),
dim
=
1
)
y_axis
=
y_axis
.
reshape
(
H
,
1
,
2
).
repeat
(
1
,
W
,
1
).
reshape
(
H
*
W
,
2
)
x_range
=
(
torch
.
arange
(
W
)
/
float
(
W
-
1
)).
to
(
tensor
.
device
)
#x_axis =torch.stack((x_range,1-x_range),dim=1)
x_axis
=
torch
.
stack
((
torch
.
cos
(
x_range
*
math
.
pi
),
torch
.
sin
(
x_range
*
math
.
pi
)),
dim
=
1
)
x_axis
=
x_axis
.
reshape
(
1
,
W
,
2
).
repeat
(
H
,
1
,
1
).
reshape
(
H
*
W
,
2
)
x_pos
=
torch
.
cat
((
y_axis
,
x_axis
),
dim
=
1
)
x_pos
=
self
.
fc
(
x_pos
)
if
self
.
pos_norm
:
x_pos
=
self
.
norm
(
x_pos
)
#print('xpos,', x_pos.max(),x_pos.min())
return
x_pos
\ No newline at end of file
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/se_layer.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
from
mmcv.runner
import
BaseModule
class
SELayer
(
BaseModule
):
"""Squeeze-and-Excitation Module.
Args:
channels (int): The input (and output) channels of the SE layer.
ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
``int(channels/ratio)``. Default: 16.
conv_cfg (None or dict): Config dict for convolution layer.
Default: None, which means using conv2d.
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
If act_cfg is a dict, two activation layers will be configurated
by this dict. If act_cfg is a sequence of dicts, the first
activation layer will be configurated by the first dict and the
second activation layer will be configurated by the second dict.
Default: (dict(type='ReLU'), dict(type='Sigmoid'))
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None
"""
def
__init__
(
self
,
channels
,
ratio
=
16
,
conv_cfg
=
None
,
act_cfg
=
(
dict
(
type
=
'ReLU'
),
dict
(
type
=
'Sigmoid'
)),
init_cfg
=
None
):
super
(
SELayer
,
self
).
__init__
(
init_cfg
)
if
isinstance
(
act_cfg
,
dict
):
act_cfg
=
(
act_cfg
,
act_cfg
)
assert
len
(
act_cfg
)
==
2
assert
mmcv
.
is_tuple_of
(
act_cfg
,
dict
)
self
.
global_avgpool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
conv1
=
ConvModule
(
in_channels
=
channels
,
out_channels
=
int
(
channels
/
ratio
),
kernel_size
=
1
,
stride
=
1
,
conv_cfg
=
conv_cfg
,
act_cfg
=
act_cfg
[
0
])
self
.
conv2
=
ConvModule
(
in_channels
=
int
(
channels
/
ratio
),
out_channels
=
channels
,
kernel_size
=
1
,
stride
=
1
,
conv_cfg
=
conv_cfg
,
act_cfg
=
act_cfg
[
1
])
def
forward
(
self
,
x
):
out
=
self
.
global_avgpool
(
x
)
out
=
self
.
conv1
(
out
)
out
=
self
.
conv2
(
out
)
return
x
*
out
class
DyReLU
(
BaseModule
):
"""Dynamic ReLU (DyReLU) module.
See `Dynamic ReLU <https://arxiv.org/abs/2003.10027>`_ for details.
Current implementation is specialized for task-aware attention in DyHead.
HSigmoid arguments in default act_cfg follow DyHead official code.
https://github.com/microsoft/DynamicHead/blob/master/dyhead/dyrelu.py
Args:
channels (int): The input (and output) channels of DyReLU module.
ratio (int): Squeeze ratio in Squeeze-and-Excitation-like module,
the intermediate channel will be ``int(channels/ratio)``.
Default: 4.
conv_cfg (None or dict): Config dict for convolution layer.
Default: None, which means using conv2d.
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
If act_cfg is a dict, two activation layers will be configurated
by this dict. If act_cfg is a sequence of dicts, the first
activation layer will be configurated by the first dict and the
second activation layer will be configurated by the second dict.
Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
divisor=6.0))
init_cfg (dict or list[dict], optional): Initialization config dict.
Default: None
"""
def
__init__
(
self
,
channels
,
ratio
=
4
,
conv_cfg
=
None
,
act_cfg
=
(
dict
(
type
=
'ReLU'
),
dict
(
type
=
'HSigmoid'
,
bias
=
3.0
,
divisor
=
6.0
)),
init_cfg
=
None
):
super
().
__init__
(
init_cfg
=
init_cfg
)
if
isinstance
(
act_cfg
,
dict
):
act_cfg
=
(
act_cfg
,
act_cfg
)
assert
len
(
act_cfg
)
==
2
assert
mmcv
.
is_tuple_of
(
act_cfg
,
dict
)
self
.
channels
=
channels
self
.
expansion
=
4
# for a1, b1, a2, b2
self
.
global_avgpool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
conv1
=
ConvModule
(
in_channels
=
channels
,
out_channels
=
int
(
channels
/
ratio
),
kernel_size
=
1
,
stride
=
1
,
conv_cfg
=
conv_cfg
,
act_cfg
=
act_cfg
[
0
])
self
.
conv2
=
ConvModule
(
in_channels
=
int
(
channels
/
ratio
),
out_channels
=
channels
*
self
.
expansion
,
kernel_size
=
1
,
stride
=
1
,
conv_cfg
=
conv_cfg
,
act_cfg
=
act_cfg
[
1
])
def
forward
(
self
,
x
):
"""Forward function."""
coeffs
=
self
.
global_avgpool
(
x
)
coeffs
=
self
.
conv1
(
coeffs
)
coeffs
=
self
.
conv2
(
coeffs
)
-
0.5
# value range: [-0.5, 0.5]
a1
,
b1
,
a2
,
b2
=
torch
.
split
(
coeffs
,
self
.
channels
,
dim
=
1
)
a1
=
a1
*
2.0
+
1.0
# [-1.0, 1.0] + 1.0
a2
=
a2
*
2.0
# [-1.0, 1.0]
out
=
torch
.
max
(
x
*
a1
+
b1
,
x
*
a2
+
b2
)
return
\ No newline at end of file
docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/models/utils/visual.py
0 → 100644
View file @
19472568
import
torch
from
torchvision.utils
import
make_grid
import
torchvision
import
matplotlib.pyplot
as
plt
import
cv2
def
convert_color
(
img_path
):
plt
.
figure
()
img
=
cv2
.
imread
(
img_path
,
cv2
.
IMREAD_GRAYSCALE
)
plt
.
imsave
(
img_path
,
img
,
cmap
=
plt
.
get_cmap
(
'viridis'
))
plt
.
close
()
def
save_tensor
(
tensor
,
path
,
pad_value
=
254.0
,):
print
(
'save_tensor'
,
path
)
tensor
=
tensor
.
to
(
torch
.
float
).
detach
().
cpu
()
if
tensor
.
type
()
==
'torch.BoolTensor'
:
tensor
=
tensor
*
255
if
len
(
tensor
.
shape
)
==
3
:
tensor
=
tensor
.
unsqueeze
(
1
)
tensor
=
make_grid
(
tensor
,
pad_value
=
pad_value
,
normalize
=
False
).
permute
(
1
,
2
,
0
).
numpy
().
copy
()
torchvision
.
utils
.
save_image
(
torch
.
tensor
(
tensor
).
permute
(
2
,
0
,
1
),
path
)
convert_color
(
path
)
docker-hub/MapTRv2/MapTR/requirement.txt
0 → 100644
View file @
19472568
shapely==1.8.5.post1
av2
\ No newline at end of file
docker-hub/MapTRv2/MapTR/test.py
0 → 100644
View file @
19472568
import
torch
x
=
torch
.
rand
(
2
,
3
)
y
=
torch
.
rand
(
3
,
3
)
z
=
[
x
,
y
]
z
=
torch
.
as_tensor
(
z
,
device
=
"cuda"
)
print
(
z
.
shape
)
\ No newline at end of file
docker-hub/MapTRv2/MapTR/tools/analysis_tools/__init__.py
0 → 100644
View file @
19472568
docker-hub/MapTRv2/MapTR/tools/analysis_tools/analyze_logs.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
json
import
numpy
as
np
import
seaborn
as
sns
from
collections
import
defaultdict
from
matplotlib
import
pyplot
as
plt
def
cal_train_time
(
log_dicts
,
args
):
for
i
,
log_dict
in
enumerate
(
log_dicts
):
print
(
f
'
{
"-"
*
5
}
Analyze train time of
{
args
.
json_logs
[
i
]
}{
"-"
*
5
}
'
)
all_times
=
[]
for
epoch
in
log_dict
.
keys
():
if
args
.
include_outliers
:
all_times
.
append
(
log_dict
[
epoch
][
'time'
])
else
:
all_times
.
append
(
log_dict
[
epoch
][
'time'
][
1
:])
all_times
=
np
.
array
(
all_times
)
epoch_ave_time
=
all_times
.
mean
(
-
1
)
slowest_epoch
=
epoch_ave_time
.
argmax
()
fastest_epoch
=
epoch_ave_time
.
argmin
()
std_over_epoch
=
epoch_ave_time
.
std
()
print
(
f
'slowest epoch
{
slowest_epoch
+
1
}
, '
f
'average time is
{
epoch_ave_time
[
slowest_epoch
]:.
4
f
}
'
)
print
(
f
'fastest epoch
{
fastest_epoch
+
1
}
, '
f
'average time is
{
epoch_ave_time
[
fastest_epoch
]:.
4
f
}
'
)
print
(
f
'time std over epochs is
{
std_over_epoch
:.
4
f
}
'
)
print
(
f
'average iter time:
{
np
.
mean
(
all_times
):.
4
f
}
s/iter'
)
print
()
def
plot_curve
(
log_dicts
,
args
):
if
args
.
backend
is
not
None
:
plt
.
switch_backend
(
args
.
backend
)
sns
.
set_style
(
args
.
style
)
# if legend is None, use {filename}_{key} as legend
legend
=
args
.
legend
if
legend
is
None
:
legend
=
[]
for
json_log
in
args
.
json_logs
:
for
metric
in
args
.
keys
:
legend
.
append
(
f
'
{
json_log
}
_
{
metric
}
'
)
assert
len
(
legend
)
==
(
len
(
args
.
json_logs
)
*
len
(
args
.
keys
))
metrics
=
args
.
keys
num_metrics
=
len
(
metrics
)
for
i
,
log_dict
in
enumerate
(
log_dicts
):
epochs
=
list
(
log_dict
.
keys
())
for
j
,
metric
in
enumerate
(
metrics
):
print
(
f
'plot curve of
{
args
.
json_logs
[
i
]
}
, metric is
{
metric
}
'
)
if
metric
not
in
log_dict
[
epochs
[
args
.
interval
-
1
]]:
raise
KeyError
(
f
'
{
args
.
json_logs
[
i
]
}
does not contain metric
{
metric
}
'
)
if
args
.
mode
==
'eval'
:
if
min
(
epochs
)
==
args
.
interval
:
x0
=
args
.
interval
else
:
# if current training is resumed from previous checkpoint
# we lost information in early epochs
# `xs` should start according to `min(epochs)`
if
min
(
epochs
)
%
args
.
interval
==
0
:
x0
=
min
(
epochs
)
else
:
# find the first epoch that do eval
x0
=
min
(
epochs
)
+
args
.
interval
-
\
min
(
epochs
)
%
args
.
interval
xs
=
np
.
arange
(
x0
,
max
(
epochs
)
+
1
,
args
.
interval
)
ys
=
[]
for
epoch
in
epochs
[
args
.
interval
-
1
::
args
.
interval
]:
ys
+=
log_dict
[
epoch
][
metric
]
# if training is aborted before eval of the last epoch
# `xs` and `ys` will have different length and cause an error
# check if `ys[-1]` is empty here
if
not
log_dict
[
epoch
][
metric
]:
xs
=
xs
[:
-
1
]
ax
=
plt
.
gca
()
ax
.
set_xticks
(
xs
)
plt
.
xlabel
(
'epoch'
)
plt
.
plot
(
xs
,
ys
,
label
=
legend
[
i
*
num_metrics
+
j
],
marker
=
'o'
)
else
:
xs
=
[]
ys
=
[]
num_iters_per_epoch
=
\
log_dict
[
epochs
[
args
.
interval
-
1
]][
'iter'
][
-
1
]
for
epoch
in
epochs
[
args
.
interval
-
1
::
args
.
interval
]:
iters
=
log_dict
[
epoch
][
'iter'
]
if
log_dict
[
epoch
][
'mode'
][
-
1
]
==
'val'
:
iters
=
iters
[:
-
1
]
xs
.
append
(
np
.
array
(
iters
)
+
(
epoch
-
1
)
*
num_iters_per_epoch
)
ys
.
append
(
np
.
array
(
log_dict
[
epoch
][
metric
][:
len
(
iters
)]))
xs
=
np
.
concatenate
(
xs
)
ys
=
np
.
concatenate
(
ys
)
plt
.
xlabel
(
'iter'
)
plt
.
plot
(
xs
,
ys
,
label
=
legend
[
i
*
num_metrics
+
j
],
linewidth
=
0.5
)
plt
.
legend
()
if
args
.
title
is
not
None
:
plt
.
title
(
args
.
title
)
if
args
.
out
is
None
:
plt
.
show
()
else
:
print
(
f
'save curve to:
{
args
.
out
}
'
)
plt
.
savefig
(
args
.
out
)
plt
.
cla
()
def
add_plot_parser
(
subparsers
):
parser_plt
=
subparsers
.
add_parser
(
'plot_curve'
,
help
=
'parser for plotting curves'
)
parser_plt
.
add_argument
(
'json_logs'
,
type
=
str
,
nargs
=
'+'
,
help
=
'path of train log in json format'
)
parser_plt
.
add_argument
(
'--keys'
,
type
=
str
,
nargs
=
'+'
,
default
=
[
'mAP_0.25'
],
help
=
'the metric that you want to plot'
)
parser_plt
.
add_argument
(
'--title'
,
type
=
str
,
help
=
'title of figure'
)
parser_plt
.
add_argument
(
'--legend'
,
type
=
str
,
nargs
=
'+'
,
default
=
None
,
help
=
'legend of each plot'
)
parser_plt
.
add_argument
(
'--backend'
,
type
=
str
,
default
=
None
,
help
=
'backend of plt'
)
parser_plt
.
add_argument
(
'--style'
,
type
=
str
,
default
=
'dark'
,
help
=
'style of plt'
)
parser_plt
.
add_argument
(
'--out'
,
type
=
str
,
default
=
None
)
parser_plt
.
add_argument
(
'--mode'
,
type
=
str
,
default
=
'train'
)
parser_plt
.
add_argument
(
'--interval'
,
type
=
int
,
default
=
1
)
def
add_time_parser
(
subparsers
):
parser_time
=
subparsers
.
add_parser
(
'cal_train_time'
,
help
=
'parser for computing the average time per training iteration'
)
parser_time
.
add_argument
(
'json_logs'
,
type
=
str
,
nargs
=
'+'
,
help
=
'path of train log in json format'
)
parser_time
.
add_argument
(
'--include-outliers'
,
action
=
'store_true'
,
help
=
'include the first value of every epoch when computing '
'the average time'
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Analyze Json Log'
)
# currently only support plot curve and calculate average train time
subparsers
=
parser
.
add_subparsers
(
dest
=
'task'
,
help
=
'task parser'
)
add_plot_parser
(
subparsers
)
add_time_parser
(
subparsers
)
args
=
parser
.
parse_args
()
return
args
def
load_json_logs
(
json_logs
):
# load and convert json_logs to log_dict, key is epoch, value is a sub dict
# keys of sub dict is different metrics, e.g. memory, bbox_mAP
# value of sub dict is a list of corresponding values of all iterations
log_dicts
=
[
dict
()
for
_
in
json_logs
]
for
json_log
,
log_dict
in
zip
(
json_logs
,
log_dicts
):
with
open
(
json_log
,
'r'
)
as
log_file
:
for
line
in
log_file
:
log
=
json
.
loads
(
line
.
strip
())
# skip lines without `epoch` field
if
'epoch'
not
in
log
:
continue
epoch
=
log
.
pop
(
'epoch'
)
if
epoch
not
in
log_dict
:
log_dict
[
epoch
]
=
defaultdict
(
list
)
for
k
,
v
in
log
.
items
():
log_dict
[
epoch
][
k
].
append
(
v
)
return
log_dicts
def
main
():
args
=
parse_args
()
json_logs
=
args
.
json_logs
for
json_log
in
json_logs
:
assert
json_log
.
endswith
(
'.json'
)
log_dicts
=
load_json_logs
(
json_logs
)
eval
(
args
.
task
)(
log_dicts
,
args
)
if
__name__
==
'__main__'
:
main
()
docker-hub/MapTRv2/MapTR/tools/analysis_tools/benchmark.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
time
import
torch
from
mmcv
import
Config
from
mmcv.parallel
import
MMDataParallel
from
mmcv.runner
import
load_checkpoint
,
wrap_fp16_model
import
sys
sys
.
path
.
append
(
'.'
)
from
projects.mmdet3d_plugin.datasets.builder
import
build_dataloader
from
projects.mmdet3d_plugin.datasets
import
custom_build_dataset
# from mmdet3d.datasets import build_dataloader, build_dataset
from
mmdet3d.models
import
build_detector
#from tools.misc.fuse_conv_bn import fuse_module
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'MMDet benchmark a model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'--checkpoint'
,
default
=
None
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--samples'
,
default
=
2000
,
help
=
'samples to benchmark'
)
parser
.
add_argument
(
'--log-interval'
,
default
=
50
,
help
=
'interval of logging'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
cfg
.
model
.
pretrained
=
None
cfg
.
data
.
test
.
test_mode
=
True
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
print
(
cfg
.
data
.
test
)
dataset
=
custom_build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
cfg
.
data
.
workers_per_gpu
,
dist
=
False
,
shuffle
=
False
)
# build the model and load checkpoint
cfg
.
model
.
train_cfg
=
None
model
=
build_detector
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
wrap_fp16_model
(
model
)
if
args
.
checkpoint
is
not
None
:
load_checkpoint
(
model
,
args
.
checkpoint
,
map_location
=
'cpu'
)
#if args.fuse_conv_bn:
# model = fuse_module(model)
model
=
MMDataParallel
(
model
,
device_ids
=
[
0
])
model
.
eval
()
# the first several iterations may be very slow so skip them
num_warmup
=
5
pure_inf_time
=
0
# benchmark with several samples and take the average
for
i
,
data
in
enumerate
(
data_loader
):
torch
.
cuda
.
synchronize
()
start_time
=
time
.
perf_counter
()
with
torch
.
no_grad
():
model
(
return_loss
=
False
,
rescale
=
True
,
**
data
)
torch
.
cuda
.
synchronize
()
elapsed
=
time
.
perf_counter
()
-
start_time
if
i
>=
num_warmup
:
pure_inf_time
+=
elapsed
if
(
i
+
1
)
%
args
.
log_interval
==
0
:
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Done image [
{
i
+
1
:
<
3
}
/
{
args
.
samples
}
], '
f
'fps:
{
fps
:.
1
f
}
img / s'
)
if
(
i
+
1
)
==
args
.
samples
:
pure_inf_time
+=
elapsed
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Overall fps:
{
fps
:.
1
f
}
img / s'
)
break
if
__name__
==
'__main__'
:
main
()
docker-hub/MapTRv2/MapTR/tools/analysis_tools/get_params.py
0 → 100644
View file @
19472568
import
torch
file_path
=
'./ckpts/bevformer_v4.pth'
model
=
torch
.
load
(
file_path
,
map_location
=
'cpu'
)
all
=
0
for
key
in
list
(
model
[
'state_dict'
].
keys
()):
all
+=
model
[
'state_dict'
][
key
].
nelement
()
print
(
all
)
# smaller 63374123
# v4 69140395
docker-hub/MapTRv2/MapTR/tools/analysis_tools/visual.py
0 → 100644
View file @
19472568
# Based on https://github.com/nutonomy/nuscenes-devkit
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import
mmcv
from
nuscenes.nuscenes
import
NuScenes
from
PIL
import
Image
from
nuscenes.utils.geometry_utils
import
view_points
,
box_in_image
,
BoxVisibility
,
transform_matrix
from
typing
import
Tuple
,
List
,
Iterable
import
matplotlib.pyplot
as
plt
import
numpy
as
np
from
PIL
import
Image
from
matplotlib
import
rcParams
from
matplotlib.axes
import
Axes
from
pyquaternion
import
Quaternion
from
PIL
import
Image
from
matplotlib
import
rcParams
from
matplotlib.axes
import
Axes
from
pyquaternion
import
Quaternion
from
tqdm
import
tqdm
from
nuscenes.utils.data_classes
import
LidarPointCloud
,
RadarPointCloud
,
Box
from
nuscenes.utils.geometry_utils
import
view_points
,
box_in_image
,
BoxVisibility
,
transform_matrix
from
nuscenes.eval.common.data_classes
import
EvalBoxes
,
EvalBox
from
nuscenes.eval.detection.data_classes
import
DetectionBox
from
nuscenes.eval.detection.utils
import
category_to_detection_name
from
nuscenes.eval.detection.render
import
visualize_sample
cams
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_RIGHT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_FRONT_LEFT'
]
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
nuscenes.utils.data_classes
import
LidarPointCloud
,
RadarPointCloud
,
Box
from
PIL
import
Image
from
matplotlib
import
rcParams
def
render_annotation
(
anntoken
:
str
,
margin
:
float
=
10
,
view
:
np
.
ndarray
=
np
.
eye
(
4
),
box_vis_level
:
BoxVisibility
=
BoxVisibility
.
ANY
,
out_path
:
str
=
'render.png'
,
extra_info
:
bool
=
False
)
->
None
:
"""
Render selected annotation.
:param anntoken: Sample_annotation token.
:param margin: How many meters in each direction to include in LIDAR view.
:param view: LIDAR view point.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param out_path: Optional path to save the rendered figure to disk.
:param extra_info: Whether to render extra information below camera view.
"""
ann_record
=
nusc
.
get
(
'sample_annotation'
,
anntoken
)
sample_record
=
nusc
.
get
(
'sample'
,
ann_record
[
'sample_token'
])
assert
'LIDAR_TOP'
in
sample_record
[
'data'
].
keys
(),
'Error: No LIDAR_TOP in data, unable to render.'
# Figure out which camera the object is fully visible in (this may return nothing).
boxes
,
cam
=
[],
[]
cams
=
[
key
for
key
in
sample_record
[
'data'
].
keys
()
if
'CAM'
in
key
]
all_bboxes
=
[]
select_cams
=
[]
for
cam
in
cams
:
_
,
boxes
,
_
=
nusc
.
get_sample_data
(
sample_record
[
'data'
][
cam
],
box_vis_level
=
box_vis_level
,
selected_anntokens
=
[
anntoken
])
if
len
(
boxes
)
>
0
:
all_bboxes
.
append
(
boxes
)
select_cams
.
append
(
cam
)
# We found an image that matches. Let's abort.
# assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
# 'Try using e.g. BoxVisibility.ANY.'
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
num_cam
=
len
(
all_bboxes
)
fig
,
axes
=
plt
.
subplots
(
1
,
num_cam
+
1
,
figsize
=
(
18
,
9
))
select_cams
=
[
sample_record
[
'data'
][
cam
]
for
cam
in
select_cams
]
print
(
'bbox in cams:'
,
select_cams
)
# Plot LIDAR view.
lidar
=
sample_record
[
'data'
][
'LIDAR_TOP'
]
data_path
,
boxes
,
camera_intrinsic
=
nusc
.
get_sample_data
(
lidar
,
selected_anntokens
=
[
anntoken
])
LidarPointCloud
.
from_file
(
data_path
).
render_height
(
axes
[
0
],
view
=
view
)
for
box
in
boxes
:
c
=
np
.
array
(
get_color
(
box
.
name
))
/
255.0
box
.
render
(
axes
[
0
],
view
=
view
,
colors
=
(
c
,
c
,
c
))
corners
=
view_points
(
boxes
[
0
].
corners
(),
view
,
False
)[:
2
,
:]
axes
[
0
].
set_xlim
([
np
.
min
(
corners
[
0
,
:])
-
margin
,
np
.
max
(
corners
[
0
,
:])
+
margin
])
axes
[
0
].
set_ylim
([
np
.
min
(
corners
[
1
,
:])
-
margin
,
np
.
max
(
corners
[
1
,
:])
+
margin
])
axes
[
0
].
axis
(
'off'
)
axes
[
0
].
set_aspect
(
'equal'
)
# Plot CAMERA view.
for
i
in
range
(
1
,
num_cam
+
1
):
cam
=
select_cams
[
i
-
1
]
data_path
,
boxes
,
camera_intrinsic
=
nusc
.
get_sample_data
(
cam
,
selected_anntokens
=
[
anntoken
])
im
=
Image
.
open
(
data_path
)
axes
[
i
].
imshow
(
im
)
axes
[
i
].
set_title
(
nusc
.
get
(
'sample_data'
,
cam
)[
'channel'
])
axes
[
i
].
axis
(
'off'
)
axes
[
i
].
set_aspect
(
'equal'
)
for
box
in
boxes
:
c
=
np
.
array
(
get_color
(
box
.
name
))
/
255.0
box
.
render
(
axes
[
i
],
view
=
camera_intrinsic
,
normalize
=
True
,
colors
=
(
c
,
c
,
c
))
# Print extra information about the annotation below the camera view.
axes
[
i
].
set_xlim
(
0
,
im
.
size
[
0
])
axes
[
i
].
set_ylim
(
im
.
size
[
1
],
0
)
if
extra_info
:
rcParams
[
'font.family'
]
=
'monospace'
w
,
l
,
h
=
ann_record
[
'size'
]
category
=
ann_record
[
'category_name'
]
lidar_points
=
ann_record
[
'num_lidar_pts'
]
radar_points
=
ann_record
[
'num_radar_pts'
]
sample_data_record
=
nusc
.
get
(
'sample_data'
,
sample_record
[
'data'
][
'LIDAR_TOP'
])
pose_record
=
nusc
.
get
(
'ego_pose'
,
sample_data_record
[
'ego_pose_token'
])
dist
=
np
.
linalg
.
norm
(
np
.
array
(
pose_record
[
'translation'
])
-
np
.
array
(
ann_record
[
'translation'
]))
information
=
'
\n
'
.
join
([
'category: {}'
.
format
(
category
),
''
,
'# lidar points: {0:>4}'
.
format
(
lidar_points
),
'# radar points: {0:>4}'
.
format
(
radar_points
),
''
,
'distance: {:>7.3f}m'
.
format
(
dist
),
''
,
'width: {:>7.3f}m'
.
format
(
w
),
'length: {:>7.3f}m'
.
format
(
l
),
'height: {:>7.3f}m'
.
format
(
h
)])
plt
.
annotate
(
information
,
(
0
,
0
),
(
0
,
-
20
),
xycoords
=
'axes fraction'
,
textcoords
=
'offset points'
,
va
=
'top'
)
if
out_path
is
not
None
:
plt
.
savefig
(
out_path
)
def
get_sample_data
(
sample_data_token
:
str
,
box_vis_level
:
BoxVisibility
=
BoxVisibility
.
ANY
,
selected_anntokens
=
None
,
use_flat_vehicle_coordinates
:
bool
=
False
):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
cs_record
=
nusc
.
get
(
'calibrated_sensor'
,
sd_record
[
'calibrated_sensor_token'
])
sensor_record
=
nusc
.
get
(
'sensor'
,
cs_record
[
'sensor_token'
])
pose_record
=
nusc
.
get
(
'ego_pose'
,
sd_record
[
'ego_pose_token'
])
data_path
=
nusc
.
get_sample_data_path
(
sample_data_token
)
if
sensor_record
[
'modality'
]
==
'camera'
:
cam_intrinsic
=
np
.
array
(
cs_record
[
'camera_intrinsic'
])
imsize
=
(
sd_record
[
'width'
],
sd_record
[
'height'
])
else
:
cam_intrinsic
=
None
imsize
=
None
# Retrieve all sample annotations and map to sensor coordinate system.
if
selected_anntokens
is
not
None
:
boxes
=
list
(
map
(
nusc
.
get_box
,
selected_anntokens
))
else
:
boxes
=
nusc
.
get_boxes
(
sample_data_token
)
# Make list of Box objects including coord system transforms.
box_list
=
[]
for
box
in
boxes
:
if
use_flat_vehicle_coordinates
:
# Move box to ego vehicle coord system parallel to world z plane.
yaw
=
Quaternion
(
pose_record
[
'rotation'
]).
yaw_pitch_roll
[
0
]
box
.
translate
(
-
np
.
array
(
pose_record
[
'translation'
]))
box
.
rotate
(
Quaternion
(
scalar
=
np
.
cos
(
yaw
/
2
),
vector
=
[
0
,
0
,
np
.
sin
(
yaw
/
2
)]).
inverse
)
else
:
# Move box to ego vehicle coord system.
box
.
translate
(
-
np
.
array
(
pose_record
[
'translation'
]))
box
.
rotate
(
Quaternion
(
pose_record
[
'rotation'
]).
inverse
)
# Move box to sensor coord system.
box
.
translate
(
-
np
.
array
(
cs_record
[
'translation'
]))
box
.
rotate
(
Quaternion
(
cs_record
[
'rotation'
]).
inverse
)
if
sensor_record
[
'modality'
]
==
'camera'
and
not
\
box_in_image
(
box
,
cam_intrinsic
,
imsize
,
vis_level
=
box_vis_level
):
continue
box_list
.
append
(
box
)
return
data_path
,
box_list
,
cam_intrinsic
def
get_predicted_data
(
sample_data_token
:
str
,
box_vis_level
:
BoxVisibility
=
BoxVisibility
.
ANY
,
selected_anntokens
=
None
,
use_flat_vehicle_coordinates
:
bool
=
False
,
pred_anns
=
None
):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
cs_record
=
nusc
.
get
(
'calibrated_sensor'
,
sd_record
[
'calibrated_sensor_token'
])
sensor_record
=
nusc
.
get
(
'sensor'
,
cs_record
[
'sensor_token'
])
pose_record
=
nusc
.
get
(
'ego_pose'
,
sd_record
[
'ego_pose_token'
])
data_path
=
nusc
.
get_sample_data_path
(
sample_data_token
)
if
sensor_record
[
'modality'
]
==
'camera'
:
cam_intrinsic
=
np
.
array
(
cs_record
[
'camera_intrinsic'
])
imsize
=
(
sd_record
[
'width'
],
sd_record
[
'height'
])
else
:
cam_intrinsic
=
None
imsize
=
None
# Retrieve all sample annotations and map to sensor coordinate system.
# if selected_anntokens is not None:
# boxes = list(map(nusc.get_box, selected_anntokens))
# else:
# boxes = nusc.get_boxes(sample_data_token)
boxes
=
pred_anns
# Make list of Box objects including coord system transforms.
box_list
=
[]
for
box
in
boxes
:
if
use_flat_vehicle_coordinates
:
# Move box to ego vehicle coord system parallel to world z plane.
yaw
=
Quaternion
(
pose_record
[
'rotation'
]).
yaw_pitch_roll
[
0
]
box
.
translate
(
-
np
.
array
(
pose_record
[
'translation'
]))
box
.
rotate
(
Quaternion
(
scalar
=
np
.
cos
(
yaw
/
2
),
vector
=
[
0
,
0
,
np
.
sin
(
yaw
/
2
)]).
inverse
)
else
:
# Move box to ego vehicle coord system.
box
.
translate
(
-
np
.
array
(
pose_record
[
'translation'
]))
box
.
rotate
(
Quaternion
(
pose_record
[
'rotation'
]).
inverse
)
# Move box to sensor coord system.
box
.
translate
(
-
np
.
array
(
cs_record
[
'translation'
]))
box
.
rotate
(
Quaternion
(
cs_record
[
'rotation'
]).
inverse
)
if
sensor_record
[
'modality'
]
==
'camera'
and
not
\
box_in_image
(
box
,
cam_intrinsic
,
imsize
,
vis_level
=
box_vis_level
):
continue
box_list
.
append
(
box
)
return
data_path
,
box_list
,
cam_intrinsic
def
lidiar_render
(
sample_token
,
data
,
out_path
=
None
):
bbox_gt_list
=
[]
bbox_pred_list
=
[]
anns
=
nusc
.
get
(
'sample'
,
sample_token
)[
'anns'
]
for
ann
in
anns
:
content
=
nusc
.
get
(
'sample_annotation'
,
ann
)
try
:
bbox_gt_list
.
append
(
DetectionBox
(
sample_token
=
content
[
'sample_token'
],
translation
=
tuple
(
content
[
'translation'
]),
size
=
tuple
(
content
[
'size'
]),
rotation
=
tuple
(
content
[
'rotation'
]),
velocity
=
nusc
.
box_velocity
(
content
[
'token'
])[:
2
],
ego_translation
=
(
0.0
,
0.0
,
0.0
)
if
'ego_translation'
not
in
content
else
tuple
(
content
[
'ego_translation'
]),
num_pts
=-
1
if
'num_pts'
not
in
content
else
int
(
content
[
'num_pts'
]),
detection_name
=
category_to_detection_name
(
content
[
'category_name'
]),
detection_score
=-
1.0
if
'detection_score'
not
in
content
else
float
(
content
[
'detection_score'
]),
attribute_name
=
''
))
except
:
pass
bbox_anns
=
data
[
'results'
][
sample_token
]
for
content
in
bbox_anns
:
bbox_pred_list
.
append
(
DetectionBox
(
sample_token
=
content
[
'sample_token'
],
translation
=
tuple
(
content
[
'translation'
]),
size
=
tuple
(
content
[
'size'
]),
rotation
=
tuple
(
content
[
'rotation'
]),
velocity
=
tuple
(
content
[
'velocity'
]),
ego_translation
=
(
0.0
,
0.0
,
0.0
)
if
'ego_translation'
not
in
content
else
tuple
(
content
[
'ego_translation'
]),
num_pts
=-
1
if
'num_pts'
not
in
content
else
int
(
content
[
'num_pts'
]),
detection_name
=
content
[
'detection_name'
],
detection_score
=-
1.0
if
'detection_score'
not
in
content
else
float
(
content
[
'detection_score'
]),
attribute_name
=
content
[
'attribute_name'
]))
gt_annotations
=
EvalBoxes
()
pred_annotations
=
EvalBoxes
()
gt_annotations
.
add_boxes
(
sample_token
,
bbox_gt_list
)
pred_annotations
.
add_boxes
(
sample_token
,
bbox_pred_list
)
print
(
'green is ground truth'
)
print
(
'blue is the predited result'
)
visualize_sample
(
nusc
,
sample_token
,
gt_annotations
,
pred_annotations
,
savepath
=
out_path
+
'_bev'
)
def
get_color
(
category_name
:
str
):
"""
Provides the default colors based on the category names.
This method works for the general nuScenes categories, as well as the nuScenes detection categories.
"""
a
=
[
'noise'
,
'animal'
,
'human.pedestrian.adult'
,
'human.pedestrian.child'
,
'human.pedestrian.construction_worker'
,
'human.pedestrian.personal_mobility'
,
'human.pedestrian.police_officer'
,
'human.pedestrian.stroller'
,
'human.pedestrian.wheelchair'
,
'movable_object.barrier'
,
'movable_object.debris'
,
'movable_object.pushable_pullable'
,
'movable_object.trafficcone'
,
'static_object.bicycle_rack'
,
'vehicle.bicycle'
,
'vehicle.bus.bendy'
,
'vehicle.bus.rigid'
,
'vehicle.car'
,
'vehicle.construction'
,
'vehicle.emergency.ambulance'
,
'vehicle.emergency.police'
,
'vehicle.motorcycle'
,
'vehicle.trailer'
,
'vehicle.truck'
,
'flat.driveable_surface'
,
'flat.other'
,
'flat.sidewalk'
,
'flat.terrain'
,
'static.manmade'
,
'static.other'
,
'static.vegetation'
,
'vehicle.ego'
]
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
#print(category_name)
if
category_name
==
'bicycle'
:
return
nusc
.
colormap
[
'vehicle.bicycle'
]
elif
category_name
==
'construction_vehicle'
:
return
nusc
.
colormap
[
'vehicle.construction'
]
elif
category_name
==
'traffic_cone'
:
return
nusc
.
colormap
[
'movable_object.trafficcone'
]
for
key
in
nusc
.
colormap
.
keys
():
if
category_name
in
key
:
return
nusc
.
colormap
[
key
]
return
[
0
,
0
,
0
]
def
render_sample_data
(
sample_toekn
:
str
,
with_anns
:
bool
=
True
,
box_vis_level
:
BoxVisibility
=
BoxVisibility
.
ANY
,
axes_limit
:
float
=
40
,
ax
=
None
,
nsweeps
:
int
=
1
,
out_path
:
str
=
None
,
underlay_map
:
bool
=
True
,
use_flat_vehicle_coordinates
:
bool
=
True
,
show_lidarseg
:
bool
=
False
,
show_lidarseg_legend
:
bool
=
False
,
filter_lidarseg_labels
=
None
,
lidarseg_preds_bin_path
:
str
=
None
,
verbose
:
bool
=
True
,
show_panoptic
:
bool
=
False
,
pred_data
=
None
,
)
->
None
:
"""
Render sample data onto axis.
:param sample_data_token: Sample_data token.
:param with_anns: Whether to draw box annotations.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param axes_limit: Axes limit for lidar and radar (measured in meters).
:param ax: Axes onto which to render.
:param nsweeps: Number of sweeps for lidar and radar.
:param out_path: Optional path to save the rendered figure to disk.
:param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
setting is more correct and rotates the plot by ~90 degrees.
:param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
:param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
:param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
or the list is empty, all classes will be displayed.
:param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
predictions for the sample.
:param verbose: Whether to display the image after it is rendered.
:param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
If show_lidarseg is True, show_panoptic will be set to False.
"""
lidiar_render
(
sample_toekn
,
pred_data
,
out_path
=
out_path
)
sample
=
nusc
.
get
(
'sample'
,
sample_toekn
)
# sample = data['results'][sample_token_list[0]][0]
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
,
]
if
ax
is
None
:
_
,
ax
=
plt
.
subplots
(
4
,
3
,
figsize
=
(
24
,
18
))
j
=
0
for
ind
,
cam
in
enumerate
(
cams
):
sample_data_token
=
sample
[
'data'
][
cam
]
sd_record
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
sensor_modality
=
sd_record
[
'sensor_modality'
]
if
sensor_modality
in
[
'lidar'
,
'radar'
]:
assert
False
elif
sensor_modality
==
'camera'
:
# Load boxes and image.
boxes
=
[
Box
(
record
[
'translation'
],
record
[
'size'
],
Quaternion
(
record
[
'rotation'
]),
name
=
record
[
'detection_name'
],
token
=
'predicted'
)
for
record
in
pred_data
[
'results'
][
sample_toekn
]
if
record
[
'detection_score'
]
>
0.2
]
data_path
,
boxes_pred
,
camera_intrinsic
=
get_predicted_data
(
sample_data_token
,
box_vis_level
=
box_vis_level
,
pred_anns
=
boxes
)
_
,
boxes_gt
,
_
=
nusc
.
get_sample_data
(
sample_data_token
,
box_vis_level
=
box_vis_level
)
if
ind
==
3
:
j
+=
1
ind
=
ind
%
3
data
=
Image
.
open
(
data_path
)
# mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
# Init axes.
# Show image.
ax
[
j
,
ind
].
imshow
(
data
)
ax
[
j
+
2
,
ind
].
imshow
(
data
)
# Show boxes.
if
with_anns
:
for
box
in
boxes_pred
:
c
=
np
.
array
(
get_color
(
box
.
name
))
/
255.0
box
.
render
(
ax
[
j
,
ind
],
view
=
camera_intrinsic
,
normalize
=
True
,
colors
=
(
c
,
c
,
c
))
for
box
in
boxes_gt
:
c
=
np
.
array
(
get_color
(
box
.
name
))
/
255.0
box
.
render
(
ax
[
j
+
2
,
ind
],
view
=
camera_intrinsic
,
normalize
=
True
,
colors
=
(
c
,
c
,
c
))
# Limit visible range.
ax
[
j
,
ind
].
set_xlim
(
0
,
data
.
size
[
0
])
ax
[
j
,
ind
].
set_ylim
(
data
.
size
[
1
],
0
)
ax
[
j
+
2
,
ind
].
set_xlim
(
0
,
data
.
size
[
0
])
ax
[
j
+
2
,
ind
].
set_ylim
(
data
.
size
[
1
],
0
)
else
:
raise
ValueError
(
"Error: Unknown sensor modality!"
)
ax
[
j
,
ind
].
axis
(
'off'
)
ax
[
j
,
ind
].
set_title
(
'PRED: {} {labels_type}'
.
format
(
sd_record
[
'channel'
],
labels_type
=
'(predictions)'
if
lidarseg_preds_bin_path
else
''
))
ax
[
j
,
ind
].
set_aspect
(
'equal'
)
ax
[
j
+
2
,
ind
].
axis
(
'off'
)
ax
[
j
+
2
,
ind
].
set_title
(
'GT:{} {labels_type}'
.
format
(
sd_record
[
'channel'
],
labels_type
=
'(predictions)'
if
lidarseg_preds_bin_path
else
''
))
ax
[
j
+
2
,
ind
].
set_aspect
(
'equal'
)
if
out_path
is
not
None
:
plt
.
savefig
(
out_path
+
'_camera'
,
bbox_inches
=
'tight'
,
pad_inches
=
0
,
dpi
=
200
)
if
verbose
:
plt
.
show
()
plt
.
close
()
if
__name__
==
'__main__'
:
nusc
=
NuScenes
(
version
=
'v1.0-trainval'
,
dataroot
=
'./data/nuscenes'
,
verbose
=
True
)
# render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
bevformer_results
=
mmcv
.
load
(
'test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json'
)
sample_token_list
=
list
(
bevformer_results
[
'results'
].
keys
())
for
id
in
range
(
0
,
10
):
render_sample_data
(
sample_token_list
[
id
],
pred_data
=
bevformer_results
,
out_path
=
sample_token_list
[
id
])
docker-hub/MapTRv2/MapTR/tools/create_data.py
0 → 100644
View file @
19472568
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from
data_converter.create_gt_database
import
create_groundtruth_database
from
data_converter
import
nuscenes_converter
as
nuscenes_converter
from
data_converter
import
lyft_converter
as
lyft_converter
from
data_converter
import
kitti_converter
as
kitti
from
data_converter
import
indoor_converter
as
indoor
import
argparse
from
os
import
path
as
osp
import
sys
sys
.
path
.
append
(
'.'
)
def
kitti_data_prep
(
root_path
,
info_prefix
,
version
,
out_dir
):
"""Prepare data related to Kitti dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
out_dir (str): Output directory of the groundtruth database info.
"""
kitti
.
create_kitti_info_file
(
root_path
,
info_prefix
)
kitti
.
create_reduced_point_cloud
(
root_path
,
info_prefix
)
info_train_path
=
osp
.
join
(
root_path
,
f
'
{
info_prefix
}
_infos_train.pkl'
)
info_val_path
=
osp
.
join
(
root_path
,
f
'
{
info_prefix
}
_infos_val.pkl'
)
info_trainval_path
=
osp
.
join
(
root_path
,
f
'
{
info_prefix
}
_infos_trainval.pkl'
)
info_test_path
=
osp
.
join
(
root_path
,
f
'
{
info_prefix
}
_infos_test.pkl'
)
kitti
.
export_2d_annotation
(
root_path
,
info_train_path
)
kitti
.
export_2d_annotation
(
root_path
,
info_val_path
)
kitti
.
export_2d_annotation
(
root_path
,
info_trainval_path
)
kitti
.
export_2d_annotation
(
root_path
,
info_test_path
)
create_groundtruth_database
(
'KittiDataset'
,
root_path
,
info_prefix
,
f
'
{
out_dir
}
/
{
info_prefix
}
_infos_train.pkl'
,
relative_path
=
False
,
mask_anno_path
=
'instances_train.json'
,
with_mask
=
(
version
==
'mask'
))
def
nuscenes_data_prep
(
root_path
,
can_bus_root_path
,
info_prefix
,
version
,
dataset_name
,
out_dir
,
max_sweeps
=
10
):
"""Prepare data related to nuScenes dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
dataset_name (str): The dataset class name.
out_dir (str): Output directory of the groundtruth database info.
max_sweeps (int): Number of input consecutive frames. Default: 10
"""
nuscenes_converter
.
create_nuscenes_infos
(
root_path
,
out_dir
,
can_bus_root_path
,
info_prefix
,
version
=
version
,
max_sweeps
=
max_sweeps
)
if
version
==
'v1.0-test'
:
info_test_path
=
osp
.
join
(
out_dir
,
f
'
{
info_prefix
}
_infos_temporal_test.pkl'
)
nuscenes_converter
.
export_2d_annotation
(
root_path
,
info_test_path
,
version
=
version
)
else
:
info_train_path
=
osp
.
join
(
out_dir
,
f
'
{
info_prefix
}
_infos_temporal_train.pkl'
)
info_val_path
=
osp
.
join
(
out_dir
,
f
'
{
info_prefix
}
_infos_temporal_val.pkl'
)
nuscenes_converter
.
export_2d_annotation
(
root_path
,
info_train_path
,
version
=
version
)
nuscenes_converter
.
export_2d_annotation
(
root_path
,
info_val_path
,
version
=
version
)
# create_groundtruth_database(dataset_name, root_path, info_prefix,
# f'{out_dir}/{info_prefix}_infos_train.pkl')
def
lyft_data_prep
(
root_path
,
info_prefix
,
version
,
max_sweeps
=
10
):
"""Prepare data related to Lyft dataset.
Related data consists of '.pkl' files recording basic infos.
Although the ground truth database and 2D annotations are not used in
Lyft, it can also be generated like nuScenes.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
max_sweeps (int, optional): Number of input consecutive frames.
Defaults to 10.
"""
lyft_converter
.
create_lyft_infos
(
root_path
,
info_prefix
,
version
=
version
,
max_sweeps
=
max_sweeps
)
def
scannet_data_prep
(
root_path
,
info_prefix
,
out_dir
,
workers
):
"""Prepare the info file for scannet dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
"""
indoor
.
create_indoor_info_file
(
root_path
,
info_prefix
,
out_dir
,
workers
=
workers
)
def
s3dis_data_prep
(
root_path
,
info_prefix
,
out_dir
,
workers
):
"""Prepare the info file for s3dis dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
"""
indoor
.
create_indoor_info_file
(
root_path
,
info_prefix
,
out_dir
,
workers
=
workers
)
def
sunrgbd_data_prep
(
root_path
,
info_prefix
,
out_dir
,
workers
):
"""Prepare the info file for sunrgbd dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
"""
indoor
.
create_indoor_info_file
(
root_path
,
info_prefix
,
out_dir
,
workers
=
workers
)
def
waymo_data_prep
(
root_path
,
info_prefix
,
version
,
out_dir
,
workers
,
max_sweeps
=
5
):
"""Prepare the info file for waymo dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
max_sweeps (int): Number of input consecutive frames. Default: 5
\
Here we store pose information of these frames for later use.
"""
from
tools.data_converter
import
waymo_converter
as
waymo
splits
=
[
'training'
,
'validation'
,
'testing'
]
for
i
,
split
in
enumerate
(
splits
):
load_dir
=
osp
.
join
(
root_path
,
'waymo_format'
,
split
)
if
split
==
'validation'
:
save_dir
=
osp
.
join
(
out_dir
,
'kitti_format'
,
'training'
)
else
:
save_dir
=
osp
.
join
(
out_dir
,
'kitti_format'
,
split
)
converter
=
waymo
.
Waymo2KITTI
(
load_dir
,
save_dir
,
prefix
=
str
(
i
),
workers
=
workers
,
test_mode
=
(
split
==
'test'
))
converter
.
convert
()
# Generate waymo infos
out_dir
=
osp
.
join
(
out_dir
,
'kitti_format'
)
kitti
.
create_waymo_info_file
(
out_dir
,
info_prefix
,
max_sweeps
=
max_sweeps
)
create_groundtruth_database
(
'WaymoDataset'
,
out_dir
,
info_prefix
,
f
'
{
out_dir
}
/
{
info_prefix
}
_infos_train.pkl'
,
relative_path
=
False
,
with_mask
=
False
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Data converter arg parser'
)
parser
.
add_argument
(
'dataset'
,
metavar
=
'kitti'
,
help
=
'name of the dataset'
)
parser
.
add_argument
(
'--root-path'
,
type
=
str
,
default
=
'./data/kitti'
,
help
=
'specify the root path of dataset'
)
parser
.
add_argument
(
'--canbus'
,
type
=
str
,
default
=
'./data'
,
help
=
'specify the root path of nuScenes canbus'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
default
=
'v1.0'
,
required
=
False
,
help
=
'specify the dataset version, no need for kitti'
)
parser
.
add_argument
(
'--max-sweeps'
,
type
=
int
,
default
=
10
,
required
=
False
,
help
=
'specify sweeps of lidar per example'
)
parser
.
add_argument
(
'--out-dir'
,
type
=
str
,
default
=
'./data/kitti'
,
required
=
'False'
,
help
=
'name of info pkl'
)
parser
.
add_argument
(
'--extra-tag'
,
type
=
str
,
default
=
'kitti'
)
parser
.
add_argument
(
'--workers'
,
type
=
int
,
default
=
4
,
help
=
'number of threads to be used'
)
args
=
parser
.
parse_args
()
if
__name__
==
'__main__'
:
if
args
.
dataset
==
'kitti'
:
kitti_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
version
=
args
.
version
,
out_dir
=
args
.
out_dir
)
elif
args
.
dataset
==
'nuscenes'
and
args
.
version
!=
'v1.0-mini'
:
train_version
=
f
'
{
args
.
version
}
-trainval'
nuscenes_data_prep
(
root_path
=
args
.
root_path
,
can_bus_root_path
=
args
.
canbus
,
info_prefix
=
args
.
extra_tag
,
version
=
train_version
,
dataset_name
=
'NuScenesDataset'
,
out_dir
=
args
.
out_dir
,
max_sweeps
=
args
.
max_sweeps
)
test_version
=
f
'
{
args
.
version
}
-test'
nuscenes_data_prep
(
root_path
=
args
.
root_path
,
can_bus_root_path
=
args
.
canbus
,
info_prefix
=
args
.
extra_tag
,
version
=
test_version
,
dataset_name
=
'NuScenesDataset'
,
out_dir
=
args
.
out_dir
,
max_sweeps
=
args
.
max_sweeps
)
elif
args
.
dataset
==
'nuscenes'
and
args
.
version
==
'v1.0-mini'
:
train_version
=
f
'
{
args
.
version
}
'
nuscenes_data_prep
(
root_path
=
args
.
root_path
,
can_bus_root_path
=
args
.
canbus
,
info_prefix
=
args
.
extra_tag
,
version
=
train_version
,
dataset_name
=
'NuScenesDataset'
,
out_dir
=
args
.
out_dir
,
max_sweeps
=
args
.
max_sweeps
)
elif
args
.
dataset
==
'lyft'
:
train_version
=
f
'
{
args
.
version
}
-train'
lyft_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
version
=
train_version
,
max_sweeps
=
args
.
max_sweeps
)
test_version
=
f
'
{
args
.
version
}
-test'
lyft_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
version
=
test_version
,
max_sweeps
=
args
.
max_sweeps
)
elif
args
.
dataset
==
'waymo'
:
waymo_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
version
=
args
.
version
,
out_dir
=
args
.
out_dir
,
workers
=
args
.
workers
,
max_sweeps
=
args
.
max_sweeps
)
elif
args
.
dataset
==
'scannet'
:
scannet_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
,
workers
=
args
.
workers
)
elif
args
.
dataset
==
's3dis'
:
s3dis_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
,
workers
=
args
.
workers
)
elif
args
.
dataset
==
'sunrgbd'
:
sunrgbd_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
,
workers
=
args
.
workers
)
docker-hub/MapTRv2/MapTR/tools/data_converter/__init__.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
docker-hub/MapTRv2/MapTR/tools/data_converter/av2_converter.py
0 → 100644
View file @
19472568
from
functools
import
partial
from
multiprocessing
import
Pool
import
multiprocessing
from
random
import
sample
import
time
import
mmcv
import
logging
from
pathlib
import
Path
from
os
import
path
as
osp
import
os
from
av2.datasets.sensor.av2_sensor_dataloader
import
AV2SensorDataLoader
from
av2.map.lane_segment
import
LaneMarkType
,
LaneSegment
from
av2.map.map_api
import
ArgoverseStaticMap
from
tqdm
import
tqdm
import
argparse
CAM_NAMES
=
[
'ring_front_center'
,
'ring_front_right'
,
'ring_front_left'
,
'ring_rear_right'
,
'ring_rear_left'
,
'ring_side_right'
,
'ring_side_left'
,
# 'stereo_front_left', 'stereo_front_right',
]
# some fail logs as stated in av2
# https://github.com/argoverse/av2-api/blob/05b7b661b7373adb5115cf13378d344d2ee43906/src/av2/map/README.md#training-online-map-inference-models
FAIL_LOGS
=
[
'75e8adad-50a6-3245-8726-5e612db3d165'
,
'54bc6dbc-ebfb-3fba-b5b3-57f88b4b79ca'
,
'af170aac-8465-3d7b-82c5-64147e94af7d'
,
'6e106cf8-f6dd-38f6-89c8-9be7a71e7275'
,
'01bb304d-7bd8-35f8-bbef-7086b688e35e'
,
'453e5558-6363-38e3-bf9b-42b5ba0a6f1d'
]
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Data converter arg parser'
)
parser
.
add_argument
(
'--data-root'
,
type
=
str
,
help
=
'specify the root path of dataset'
)
parser
.
add_argument
(
'--nproc'
,
type
=
int
,
default
=
64
,
required
=
False
,
help
=
'workers to process data'
)
args
=
parser
.
parse_args
()
return
args
def
create_av2_infos_mp
(
root_path
,
info_prefix
,
dest_path
=
None
,
split
=
'train'
,
num_multithread
=
64
):
"""Create info file of av2 dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
dest_path (str): Path to store generated file, default to root_path
split (str): Split of the data.
Default: 'train'
"""
root_path
=
osp
.
join
(
root_path
,
split
)
if
dest_path
is
None
:
dest_path
=
root_path
loader
=
AV2SensorDataLoader
(
Path
(
root_path
),
Path
(
root_path
))
log_ids
=
list
(
loader
.
get_log_ids
())
# import pdb;pdb.set_trace()
for
l
in
FAIL_LOGS
:
if
l
in
log_ids
:
log_ids
.
remove
(
l
)
print
(
'collecting samples...'
)
start_time
=
time
.
time
()
print
(
'num cpu:'
,
multiprocessing
.
cpu_count
())
print
(
f
'using
{
num_multithread
}
threads'
)
# to supress logging from av2.utils.synchronization_database
sdb_logger
=
logging
.
getLogger
(
'av2.utils.synchronization_database'
)
prev_level
=
sdb_logger
.
level
sdb_logger
.
setLevel
(
logging
.
CRITICAL
)
# FIXME: need to check the order
pool
=
Pool
(
num_multithread
)
fn
=
partial
(
get_data_from_logid
,
loader
=
loader
,
data_root
=
root_path
)
rt
=
pool
.
map_async
(
fn
,
log_ids
)
pool
.
close
()
pool
.
join
()
results
=
rt
.
get
()
samples
=
[]
discarded
=
0
sample_idx
=
0
for
_samples
,
_discarded
in
results
:
for
i
in
range
(
len
(
_samples
)):
_samples
[
i
][
'sample_idx'
]
=
sample_idx
sample_idx
+=
1
samples
+=
_samples
discarded
+=
_discarded
sdb_logger
.
setLevel
(
prev_level
)
print
(
f
'
{
len
(
samples
)
}
available samples,
{
discarded
}
samples discarded'
)
id2map
=
{}
for
log_id
in
log_ids
:
log_map_dirpath
=
Path
(
osp
.
join
(
root_path
,
log_id
,
"map"
))
vector_data_fnames
=
sorted
(
log_map_dirpath
.
glob
(
"log_map_archive_*.json"
))
# vector_data_fnames = sorted(log_map_dirpath.glob("log_map_archive_*.json"))
if
not
len
(
vector_data_fnames
)
==
1
:
raise
RuntimeError
(
f
"JSON file containing vector map data is missing (searched in
{
log_map_dirpath
}
)"
)
vector_data_fname
=
vector_data_fnames
[
0
]
vector_data_json_path
=
vector_data_fname
avm
=
ArgoverseStaticMap
.
from_json
(
vector_data_json_path
)
# import pdb;pdb.set_trace()
map_elements
=
{}
map_elements
[
'divider'
]
=
get_divider
(
avm
)
map_elements
[
'ped_crossing'
]
=
get_ped
(
avm
)
map_elements
[
'boundary'
]
=
get_boundary
(
avm
)
# map_fname = osp.join(map_path_dir, map_fname)
id2map
[
log_id
]
=
map_elements
print
(
'collected in {}s'
.
format
(
time
.
time
()
-
start_time
))
infos
=
dict
(
samples
=
samples
,
id2map
=
id2map
)
info_path
=
osp
.
join
(
dest_path
,
'{}_map_infos_{}.pkl'
.
format
(
info_prefix
,
split
))
print
(
f
'saving results to
{
info_path
}
'
)
mmcv
.
dump
(
infos
,
info_path
)
# mmcv.dump(samples, info_path)
def
get_divider
(
avm
):
divider_list
=
[]
for
ls
in
avm
.
get_scenario_lane_segments
():
for
bound_type
,
bound_city
in
zip
([
ls
.
left_mark_type
,
ls
.
right_mark_type
],
[
ls
.
left_lane_boundary
,
ls
.
right_lane_boundary
]):
if
bound_type
not
in
[
LaneMarkType
.
NONE
,]:
divider_list
.
append
(
bound_city
.
xyz
)
return
divider_list
def
get_boundary
(
avm
):
boundary_list
=
[]
for
da
in
avm
.
get_scenario_vector_drivable_areas
():
boundary_list
.
append
(
da
.
xyz
)
return
boundary_list
def
get_ped
(
avm
):
ped_list
=
[]
for
pc
in
avm
.
get_scenario_ped_crossings
():
ped_list
.
append
(
pc
.
polygon
)
return
ped_list
def
get_data_from_logid
(
log_id
,
loader
:
AV2SensorDataLoader
,
data_root
):
samples
=
[]
discarded
=
0
# We use lidar timestamps to query all sensors.
# The frequency is 10Hz
cam_timestamps
=
loader
.
_sdb
.
per_log_lidar_timestamps_index
[
log_id
]
for
ts
in
cam_timestamps
:
cam_ring_fpath
=
[
loader
.
get_closest_img_fpath
(
log_id
,
cam_name
,
ts
)
for
cam_name
in
CAM_NAMES
]
lidar_fpath
=
loader
.
get_closest_lidar_fpath
(
log_id
,
ts
)
# If bad sensor synchronization, discard the sample
if
None
in
cam_ring_fpath
or
lidar_fpath
is
None
:
discarded
+=
1
continue
cams
=
{}
for
i
,
cam_name
in
enumerate
(
CAM_NAMES
):
pinhole_cam
=
loader
.
get_log_pinhole_camera
(
log_id
,
cam_name
)
cams
[
cam_name
]
=
dict
(
img_fpath
=
str
(
cam_ring_fpath
[
i
]),
intrinsics
=
pinhole_cam
.
intrinsics
.
K
,
extrinsics
=
pinhole_cam
.
extrinsics
,
)
city_SE3_ego
=
loader
.
get_city_SE3_ego
(
log_id
,
int
(
ts
))
e2g_translation
=
city_SE3_ego
.
translation
e2g_rotation
=
city_SE3_ego
.
rotation
samples
.
append
(
dict
(
e2g_translation
=
e2g_translation
,
e2g_rotation
=
e2g_rotation
,
cams
=
cams
,
lidar_fpath
=
str
(
lidar_fpath
),
# map_fpath=map_fname,
timestamp
=
str
(
ts
),
log_id
=
log_id
,
token
=
str
(
log_id
+
'_'
+
str
(
ts
))))
return
samples
,
discarded
if
__name__
==
'__main__'
:
args
=
parse_args
()
for
name
in
[
'train'
,
'val'
,
'test'
]:
create_av2_infos_mp
(
root_path
=
args
.
data_root
,
split
=
name
,
info_prefix
=
'av2'
,
dest_path
=
args
.
data_root
,)
\ No newline at end of file
docker-hub/MapTRv2/MapTR/tools/data_converter/create_gt_database.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
numpy
as
np
import
pickle
from
mmcv
import
track_iter_progress
from
mmcv.ops
import
roi_align
from
os
import
path
as
osp
from
pycocotools
import
mask
as
maskUtils
from
pycocotools.coco
import
COCO
from
mmdet3d.core.bbox
import
box_np_ops
as
box_np_ops
from
mmdet3d.datasets
import
build_dataset
from
mmdet.core.evaluation.bbox_overlaps
import
bbox_overlaps
def
_poly2mask
(
mask_ann
,
img_h
,
img_w
):
if
isinstance
(
mask_ann
,
list
):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
rle
=
maskUtils
.
merge
(
rles
)
elif
isinstance
(
mask_ann
[
'counts'
],
list
):
# uncompressed RLE
rle
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
else
:
# rle
rle
=
mask_ann
mask
=
maskUtils
.
decode
(
rle
)
return
mask
def
_parse_coco_ann_info
(
ann_info
):
gt_bboxes
=
[]
gt_labels
=
[]
gt_bboxes_ignore
=
[]
gt_masks_ann
=
[]
for
i
,
ann
in
enumerate
(
ann_info
):
if
ann
.
get
(
'ignore'
,
False
):
continue
x1
,
y1
,
w
,
h
=
ann
[
'bbox'
]
if
ann
[
'area'
]
<=
0
:
continue
bbox
=
[
x1
,
y1
,
x1
+
w
,
y1
+
h
]
if
ann
.
get
(
'iscrowd'
,
False
):
gt_bboxes_ignore
.
append
(
bbox
)
else
:
gt_bboxes
.
append
(
bbox
)
gt_masks_ann
.
append
(
ann
[
'segmentation'
])
if
gt_bboxes
:
gt_bboxes
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
(
gt_labels
,
dtype
=
np
.
int64
)
else
:
gt_bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
if
gt_bboxes_ignore
:
gt_bboxes_ignore
=
np
.
array
(
gt_bboxes_ignore
,
dtype
=
np
.
float32
)
else
:
gt_bboxes_ignore
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
ann
=
dict
(
bboxes
=
gt_bboxes
,
bboxes_ignore
=
gt_bboxes_ignore
,
masks
=
gt_masks_ann
)
return
ann
def
crop_image_patch_v2
(
pos_proposals
,
pos_assigned_gt_inds
,
gt_masks
):
import
torch
from
torch.nn.modules.utils
import
_pair
device
=
pos_proposals
.
device
num_pos
=
pos_proposals
.
size
(
0
)
fake_inds
=
(
torch
.
arange
(
num_pos
,
device
=
device
).
to
(
dtype
=
pos_proposals
.
dtype
)[:,
None
])
rois
=
torch
.
cat
([
fake_inds
,
pos_proposals
],
dim
=
1
)
# Nx5
mask_size
=
_pair
(
28
)
rois
=
rois
.
to
(
device
=
device
)
gt_masks_th
=
(
torch
.
from_numpy
(
gt_masks
).
to
(
device
).
index_select
(
0
,
pos_assigned_gt_inds
).
to
(
dtype
=
rois
.
dtype
))
# Use RoIAlign could apparently accelerate the training (~0.1s/iter)
targets
=
(
roi_align
(
gt_masks_th
,
rois
,
mask_size
[::
-
1
],
1.0
,
0
,
True
).
squeeze
(
1
))
return
targets
def
crop_image_patch
(
pos_proposals
,
gt_masks
,
pos_assigned_gt_inds
,
org_img
):
num_pos
=
pos_proposals
.
shape
[
0
]
masks
=
[]
img_patches
=
[]
for
i
in
range
(
num_pos
):
gt_mask
=
gt_masks
[
pos_assigned_gt_inds
[
i
]]
bbox
=
pos_proposals
[
i
,
:].
astype
(
np
.
int32
)
x1
,
y1
,
x2
,
y2
=
bbox
w
=
np
.
maximum
(
x2
-
x1
+
1
,
1
)
h
=
np
.
maximum
(
y2
-
y1
+
1
,
1
)
mask_patch
=
gt_mask
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
masked_img
=
gt_mask
[...,
None
]
*
org_img
img_patch
=
masked_img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
img_patches
.
append
(
img_patch
)
masks
.
append
(
mask_patch
)
return
img_patches
,
masks
def
create_groundtruth_database
(
dataset_class_name
,
data_path
,
info_prefix
,
info_path
=
None
,
mask_anno_path
=
None
,
used_classes
=
None
,
database_save_path
=
None
,
db_info_save_path
=
None
,
relative_path
=
True
,
add_rgb
=
False
,
lidar_only
=
False
,
bev_only
=
False
,
coors_range
=
None
,
with_mask
=
False
):
"""Given the raw data, generate the ground truth database.
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str): Path of the info file.
Default: None.
mask_anno_path (str): Path of the mask_anno.
Default: None.
used_classes (list[str]): Classes have been used.
Default: None.
database_save_path (str): Path to save database.
Default: None.
db_info_save_path (str): Path to save db_info.
Default: None.
relative_path (bool): Whether to use relative path.
Default: True.
with_mask (bool): Whether to use mask.
Default: False.
"""
print
(
f
'Create GT Database of
{
dataset_class_name
}
'
)
dataset_cfg
=
dict
(
type
=
dataset_class_name
,
data_root
=
data_path
,
ann_file
=
info_path
)
if
dataset_class_name
==
'KittiDataset'
:
file_client_args
=
dict
(
backend
=
'disk'
)
dataset_cfg
.
update
(
test_mode
=
False
,
split
=
'training'
,
modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
with_mask
,
),
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
)
])
elif
dataset_class_name
==
'NuScenesDataset'
:
dataset_cfg
.
update
(
use_valid_flag
=
True
,
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
)
])
elif
dataset_class_name
==
'WaymoDataset'
:
file_client_args
=
dict
(
backend
=
'disk'
)
dataset_cfg
.
update
(
test_mode
=
False
,
split
=
'training'
,
modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
),
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
)
])
dataset
=
build_dataset
(
dataset_cfg
)
if
database_save_path
is
None
:
database_save_path
=
osp
.
join
(
data_path
,
f
'
{
info_prefix
}
_gt_database'
)
if
db_info_save_path
is
None
:
db_info_save_path
=
osp
.
join
(
data_path
,
f
'
{
info_prefix
}
_dbinfos_train.pkl'
)
mmcv
.
mkdir_or_exist
(
database_save_path
)
all_db_infos
=
dict
()
if
with_mask
:
coco
=
COCO
(
osp
.
join
(
data_path
,
mask_anno_path
))
imgIds
=
coco
.
getImgIds
()
file2id
=
dict
()
for
i
in
imgIds
:
info
=
coco
.
loadImgs
([
i
])[
0
]
file2id
.
update
({
info
[
'file_name'
]:
i
})
group_counter
=
0
for
j
in
track_iter_progress
(
list
(
range
(
len
(
dataset
)))):
input_dict
=
dataset
.
get_data_info
(
j
)
dataset
.
pre_pipeline
(
input_dict
)
example
=
dataset
.
pipeline
(
input_dict
)
annos
=
example
[
'ann_info'
]
image_idx
=
example
[
'sample_idx'
]
points
=
example
[
'points'
].
tensor
.
numpy
()
gt_boxes_3d
=
annos
[
'gt_bboxes_3d'
].
tensor
.
numpy
()
names
=
annos
[
'gt_names'
]
group_dict
=
dict
()
if
'group_ids'
in
annos
:
group_ids
=
annos
[
'group_ids'
]
else
:
group_ids
=
np
.
arange
(
gt_boxes_3d
.
shape
[
0
],
dtype
=
np
.
int64
)
difficulty
=
np
.
zeros
(
gt_boxes_3d
.
shape
[
0
],
dtype
=
np
.
int32
)
if
'difficulty'
in
annos
:
difficulty
=
annos
[
'difficulty'
]
num_obj
=
gt_boxes_3d
.
shape
[
0
]
point_indices
=
box_np_ops
.
points_in_rbbox
(
points
,
gt_boxes_3d
)
if
with_mask
:
# prepare masks
gt_boxes
=
annos
[
'gt_bboxes'
]
img_path
=
osp
.
split
(
example
[
'img_info'
][
'filename'
])[
-
1
]
if
img_path
not
in
file2id
.
keys
():
print
(
f
'skip image
{
img_path
}
for empty mask'
)
continue
img_id
=
file2id
[
img_path
]
kins_annIds
=
coco
.
getAnnIds
(
imgIds
=
img_id
)
kins_raw_info
=
coco
.
loadAnns
(
kins_annIds
)
kins_ann_info
=
_parse_coco_ann_info
(
kins_raw_info
)
h
,
w
=
annos
[
'img_shape'
][:
2
]
gt_masks
=
[
_poly2mask
(
mask
,
h
,
w
)
for
mask
in
kins_ann_info
[
'masks'
]
]
# get mask inds based on iou mapping
bbox_iou
=
bbox_overlaps
(
kins_ann_info
[
'bboxes'
],
gt_boxes
)
mask_inds
=
bbox_iou
.
argmax
(
axis
=
0
)
valid_inds
=
(
bbox_iou
.
max
(
axis
=
0
)
>
0.5
)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches
,
object_masks
=
crop_image_patch
(
gt_boxes
,
gt_masks
,
mask_inds
,
annos
[
'img'
])
for
i
in
range
(
num_obj
):
filename
=
f
'
{
image_idx
}
_
{
names
[
i
]
}
_
{
i
}
.bin'
abs_filepath
=
osp
.
join
(
database_save_path
,
filename
)
rel_filepath
=
osp
.
join
(
f
'
{
info_prefix
}
_gt_database'
,
filename
)
# save point clouds and image patches for each object
gt_points
=
points
[
point_indices
[:,
i
]]
gt_points
[:,
:
3
]
-=
gt_boxes_3d
[
i
,
:
3
]
if
with_mask
:
if
object_masks
[
i
].
sum
()
==
0
or
not
valid_inds
[
i
]:
# Skip object for empty or invalid mask
continue
img_patch_path
=
abs_filepath
+
'.png'
mask_patch_path
=
abs_filepath
+
'.mask.png'
mmcv
.
imwrite
(
object_img_patches
[
i
],
img_patch_path
)
mmcv
.
imwrite
(
object_masks
[
i
],
mask_patch_path
)
with
open
(
abs_filepath
,
'w'
)
as
f
:
gt_points
.
tofile
(
f
)
if
(
used_classes
is
None
)
or
names
[
i
]
in
used_classes
:
db_info
=
{
'name'
:
names
[
i
],
'path'
:
rel_filepath
,
'image_idx'
:
image_idx
,
'gt_idx'
:
i
,
'box3d_lidar'
:
gt_boxes_3d
[
i
],
'num_points_in_gt'
:
gt_points
.
shape
[
0
],
'difficulty'
:
difficulty
[
i
],
}
local_group_id
=
group_ids
[
i
]
# if local_group_id >= 0:
if
local_group_id
not
in
group_dict
:
group_dict
[
local_group_id
]
=
group_counter
group_counter
+=
1
db_info
[
'group_id'
]
=
group_dict
[
local_group_id
]
if
'score'
in
annos
:
db_info
[
'score'
]
=
annos
[
'score'
][
i
]
if
with_mask
:
db_info
.
update
({
'box2d_camera'
:
gt_boxes
[
i
]})
if
names
[
i
]
in
all_db_infos
:
all_db_infos
[
names
[
i
]].
append
(
db_info
)
else
:
all_db_infos
[
names
[
i
]]
=
[
db_info
]
for
k
,
v
in
all_db_infos
.
items
():
print
(
f
'load
{
len
(
v
)
}
{
k
}
database infos'
)
with
open
(
db_info_save_path
,
'wb'
)
as
f
:
pickle
.
dump
(
all_db_infos
,
f
)
docker-hub/MapTRv2/MapTR/tools/data_converter/indoor_converter.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
numpy
as
np
import
os
from
tools.data_converter.s3dis_data_utils
import
S3DISData
,
S3DISSegData
from
tools.data_converter.scannet_data_utils
import
ScanNetData
,
ScanNetSegData
from
tools.data_converter.sunrgbd_data_utils
import
SUNRGBDData
def
create_indoor_info_file
(
data_path
,
pkl_prefix
=
'sunrgbd'
,
save_path
=
None
,
use_v1
=
False
,
workers
=
4
):
"""Create indoor information file.
Get information of the raw data and save it to the pkl file.
Args:
data_path (str): Path of the data.
pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
save_path (str): Path of the pkl to be saved. Default: None.
use_v1 (bool): Whether to use v1. Default: False.
workers (int): Number of threads to be used. Default: 4.
"""
assert
os
.
path
.
exists
(
data_path
)
assert
pkl_prefix
in
[
'sunrgbd'
,
'scannet'
,
's3dis'
],
\
f
'unsupported indoor dataset
{
pkl_prefix
}
'
save_path
=
data_path
if
save_path
is
None
else
save_path
assert
os
.
path
.
exists
(
save_path
)
# generate infos for both detection and segmentation task
if
pkl_prefix
in
[
'sunrgbd'
,
'scannet'
]:
train_filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_train.pkl'
)
val_filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_val.pkl'
)
if
pkl_prefix
==
'sunrgbd'
:
# SUN RGB-D has a train-val split
train_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'train'
,
use_v1
=
use_v1
)
val_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'val'
,
use_v1
=
use_v1
)
else
:
# ScanNet has a train-val-test split
train_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'train'
)
val_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'val'
)
test_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'test'
)
test_filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_test.pkl'
)
infos_train
=
train_dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
True
)
mmcv
.
dump
(
infos_train
,
train_filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info train file is saved to
{
train_filename
}
'
)
infos_val
=
val_dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
True
)
mmcv
.
dump
(
infos_val
,
val_filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info val file is saved to
{
val_filename
}
'
)
if
pkl_prefix
==
'scannet'
:
infos_test
=
test_dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
False
)
mmcv
.
dump
(
infos_test
,
test_filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info test file is saved to
{
test_filename
}
'
)
# generate infos for the semantic segmentation task
# e.g. re-sampled scene indexes and label weights
# scene indexes are used to re-sample rooms with different number of points
# label weights are used to balance classes with different number of points
if
pkl_prefix
==
'scannet'
:
# label weight computation function is adopted from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
train_dataset
=
ScanNetSegData
(
data_root
=
data_path
,
ann_file
=
train_filename
,
split
=
'train'
,
num_points
=
8192
,
label_weight_func
=
lambda
x
:
1.0
/
np
.
log
(
1.2
+
x
))
# TODO: do we need to generate on val set?
val_dataset
=
ScanNetSegData
(
data_root
=
data_path
,
ann_file
=
val_filename
,
split
=
'val'
,
num_points
=
8192
,
label_weight_func
=
lambda
x
:
1.0
/
np
.
log
(
1.2
+
x
))
# no need to generate for test set
train_dataset
.
get_seg_infos
()
val_dataset
.
get_seg_infos
()
elif
pkl_prefix
==
's3dis'
:
# S3DIS doesn't have a fixed train-val split
# it has 6 areas instead, so we generate info file for each of them
# in training, we will use dataset to wrap different areas
splits
=
[
f
'Area_
{
i
}
'
for
i
in
[
1
,
2
,
3
,
4
,
5
,
6
]]
for
split
in
splits
:
dataset
=
S3DISData
(
root_path
=
data_path
,
split
=
split
)
info
=
dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
True
)
filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_
{
split
}
.pkl'
)
mmcv
.
dump
(
info
,
filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info
{
split
}
file is saved to
{
filename
}
'
)
seg_dataset
=
S3DISSegData
(
data_root
=
data_path
,
ann_file
=
filename
,
split
=
split
,
num_points
=
4096
,
label_weight_func
=
lambda
x
:
1.0
/
np
.
log
(
1.2
+
x
))
seg_dataset
.
get_seg_infos
()
docker-hub/MapTRv2/MapTR/tools/data_converter/kitti_converter.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
mmcv
import
numpy
as
np
from
collections
import
OrderedDict
from
nuscenes.utils.geometry_utils
import
view_points
from
pathlib
import
Path
from
mmdet3d.core.bbox
import
box_np_ops
from
.kitti_data_utils
import
get_kitti_image_info
,
get_waymo_image_info
from
.nuscenes_converter
import
post_process_coords
kitti_categories
=
(
'Pedestrian'
,
'Cyclist'
,
'Car'
)
def
convert_to_kitti_info_version2
(
info
):
"""convert kitti info v1 to v2 if possible.
Args:
info (dict): Info of the input kitti data.
- image (dict): image info
- calib (dict): calibration info
- point_cloud (dict): point cloud info
"""
if
'image'
not
in
info
or
'calib'
not
in
info
or
'point_cloud'
not
in
info
:
info
[
'image'
]
=
{
'image_shape'
:
info
[
'img_shape'
],
'image_idx'
:
info
[
'image_idx'
],
'image_path'
:
info
[
'img_path'
],
}
info
[
'calib'
]
=
{
'R0_rect'
:
info
[
'calib/R0_rect'
],
'Tr_velo_to_cam'
:
info
[
'calib/Tr_velo_to_cam'
],
'P2'
:
info
[
'calib/P2'
],
}
info
[
'point_cloud'
]
=
{
'velodyne_path'
:
info
[
'velodyne_path'
],
}
def
_read_imageset_file
(
path
):
with
open
(
path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
return
[
int
(
line
)
for
line
in
lines
]
def
_calculate_num_points_in_gt
(
data_path
,
infos
,
relative_path
,
remove_outside
=
True
,
num_features
=
4
):
for
info
in
mmcv
.
track_iter_progress
(
infos
):
pc_info
=
info
[
'point_cloud'
]
image_info
=
info
[
'image'
]
calib
=
info
[
'calib'
]
if
relative_path
:
v_path
=
str
(
Path
(
data_path
)
/
pc_info
[
'velodyne_path'
])
else
:
v_path
=
pc_info
[
'velodyne_path'
]
points_v
=
np
.
fromfile
(
v_path
,
dtype
=
np
.
float32
,
count
=-
1
).
reshape
([
-
1
,
num_features
])
rect
=
calib
[
'R0_rect'
]
Trv2c
=
calib
[
'Tr_velo_to_cam'
]
P2
=
calib
[
'P2'
]
if
remove_outside
:
points_v
=
box_np_ops
.
remove_outside_points
(
points_v
,
rect
,
Trv2c
,
P2
,
image_info
[
'image_shape'
])
# points_v = points_v[points_v[:, 0] > 0]
annos
=
info
[
'annos'
]
num_obj
=
len
([
n
for
n
in
annos
[
'name'
]
if
n
!=
'DontCare'
])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims
=
annos
[
'dimensions'
][:
num_obj
]
loc
=
annos
[
'location'
][:
num_obj
]
rots
=
annos
[
'rotation_y'
][:
num_obj
]
gt_boxes_camera
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
)
gt_boxes_lidar
=
box_np_ops
.
box_camera_to_lidar
(
gt_boxes_camera
,
rect
,
Trv2c
)
indices
=
box_np_ops
.
points_in_rbbox
(
points_v
[:,
:
3
],
gt_boxes_lidar
)
num_points_in_gt
=
indices
.
sum
(
0
)
num_ignored
=
len
(
annos
[
'dimensions'
])
-
num_obj
num_points_in_gt
=
np
.
concatenate
(
[
num_points_in_gt
,
-
np
.
ones
([
num_ignored
])])
annos
[
'num_points_in_gt'
]
=
num_points_in_gt
.
astype
(
np
.
int32
)
def
create_kitti_info_file
(
data_path
,
pkl_prefix
=
'kitti'
,
save_path
=
None
,
relative_path
=
True
):
"""Create info file of KITTI dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str): Prefix of the info file to be generated.
save_path (str): Path to save the info file.
relative_path (bool): Whether to use relative path.
"""
imageset_folder
=
Path
(
data_path
)
/
'ImageSets'
train_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'train.txt'
))
val_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'val.txt'
))
test_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'test.txt'
))
print
(
'Generate info. this may take several minutes.'
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
kitti_infos_train
=
get_kitti_image_info
(
data_path
,
training
=
True
,
velodyne
=
True
,
calib
=
True
,
image_ids
=
train_img_ids
,
relative_path
=
relative_path
)
_calculate_num_points_in_gt
(
data_path
,
kitti_infos_train
,
relative_path
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
print
(
f
'Kitti info train file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_train
,
filename
)
kitti_infos_val
=
get_kitti_image_info
(
data_path
,
training
=
True
,
velodyne
=
True
,
calib
=
True
,
image_ids
=
val_img_ids
,
relative_path
=
relative_path
)
_calculate_num_points_in_gt
(
data_path
,
kitti_infos_val
,
relative_path
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
print
(
f
'Kitti info val file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_val
,
filename
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_trainval.pkl'
print
(
f
'Kitti info trainval file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_train
+
kitti_infos_val
,
filename
)
kitti_infos_test
=
get_kitti_image_info
(
data_path
,
training
=
False
,
label_info
=
False
,
velodyne
=
True
,
calib
=
True
,
image_ids
=
test_img_ids
,
relative_path
=
relative_path
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_test.pkl'
print
(
f
'Kitti info test file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_test
,
filename
)
def
create_waymo_info_file
(
data_path
,
pkl_prefix
=
'waymo'
,
save_path
=
None
,
relative_path
=
True
,
max_sweeps
=
5
):
"""Create info file of waymo dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str): Prefix of the info file to be generated.
save_path (str | None): Path to save the info file.
relative_path (bool): Whether to use relative path.
max_sweeps (int): Max sweeps before the detection frame to be used.
"""
imageset_folder
=
Path
(
data_path
)
/
'ImageSets'
train_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'train.txt'
))
# val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
# test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
train_img_ids
=
[
each
for
each
in
train_img_ids
if
each
%
5
==
0
]
print
(
'Generate info. this may take several minutes.'
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
waymo_infos_train
=
get_waymo_image_info
(
data_path
,
training
=
True
,
velodyne
=
True
,
calib
=
True
,
pose
=
True
,
image_ids
=
train_img_ids
,
relative_path
=
relative_path
,
max_sweeps
=
max_sweeps
)
_calculate_num_points_in_gt
(
data_path
,
waymo_infos_train
,
relative_path
,
num_features
=
6
,
remove_outside
=
False
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
print
(
f
'Waymo info train file is saved to
{
filename
}
'
)
mmcv
.
dump
(
waymo_infos_train
,
filename
)
#
# waymo_infos_val = get_waymo_image_info(
# data_path,
# training=True,
# velodyne=True,
# calib=True,
# pose=True,
# image_ids=val_img_ids,
# relative_path=relative_path,
# max_sweeps=max_sweeps)
# _calculate_num_points_in_gt(
# data_path,
# waymo_infos_val,
# relative_path,
# num_features=6,
# remove_outside=False)
# filename = save_path / f'{pkl_prefix}_infos_val.pkl'
# print(f'Waymo info val file is saved to {filename}')
# mmcv.dump(waymo_infos_val, filename)
# filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
# print(f'Waymo info trainval file is saved to {filename}')
# mmcv.dump(waymo_infos_train + waymo_infos_val, filename)
# waymo_infos_test = get_waymo_image_info(
# data_path,
# training=False,
# label_info=False,
# velodyne=True,
# calib=True,
# pose=True,
# image_ids=test_img_ids,
# relative_path=relative_path,
# max_sweeps=max_sweeps)
# filename = save_path / f'{pkl_prefix}_infos_test.pkl'
# print(f'Waymo info test file is saved to {filename}')
# mmcv.dump(waymo_infos_test, filename)
def
_create_reduced_point_cloud
(
data_path
,
info_path
,
save_path
=
None
,
back
=
False
,
num_features
=
4
,
front_camera_id
=
2
):
"""Create reduced point clouds for given info.
Args:
data_path (str): Path of original data.
info_path (str): Path of data info.
save_path (str | None): Path to save reduced point cloud data.
Default: None.
back (bool): Whether to flip the points to back.
num_features (int): Number of point features. Default: 4.
front_camera_id (int): The referenced/front camera ID. Default: 2.
"""
kitti_infos
=
mmcv
.
load
(
info_path
)
for
info
in
mmcv
.
track_iter_progress
(
kitti_infos
):
pc_info
=
info
[
'point_cloud'
]
image_info
=
info
[
'image'
]
calib
=
info
[
'calib'
]
v_path
=
pc_info
[
'velodyne_path'
]
v_path
=
Path
(
data_path
)
/
v_path
points_v
=
np
.
fromfile
(
str
(
v_path
),
dtype
=
np
.
float32
,
count
=-
1
).
reshape
([
-
1
,
num_features
])
rect
=
calib
[
'R0_rect'
]
if
front_camera_id
==
2
:
P2
=
calib
[
'P2'
]
else
:
P2
=
calib
[
f
'P
{
str
(
front_camera_id
)
}
'
]
Trv2c
=
calib
[
'Tr_velo_to_cam'
]
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if
back
:
points_v
[:,
0
]
=
-
points_v
[:,
0
]
points_v
=
box_np_ops
.
remove_outside_points
(
points_v
,
rect
,
Trv2c
,
P2
,
image_info
[
'image_shape'
])
if
save_path
is
None
:
save_dir
=
v_path
.
parent
.
parent
/
(
v_path
.
parent
.
stem
+
'_reduced'
)
if
not
save_dir
.
exists
():
save_dir
.
mkdir
()
save_filename
=
save_dir
/
v_path
.
name
# save_filename = str(v_path) + '_reduced'
if
back
:
save_filename
+=
'_back'
else
:
save_filename
=
str
(
Path
(
save_path
)
/
v_path
.
name
)
if
back
:
save_filename
+=
'_back'
with
open
(
save_filename
,
'w'
)
as
f
:
points_v
.
tofile
(
f
)
def
create_reduced_point_cloud
(
data_path
,
pkl_prefix
,
train_info_path
=
None
,
val_info_path
=
None
,
test_info_path
=
None
,
save_path
=
None
,
with_back
=
False
):
"""Create reduced point clouds for training/validation/testing.
Args:
data_path (str): Path of original data.
pkl_prefix (str): Prefix of info files.
train_info_path (str | None): Path of training set info.
Default: None.
val_info_path (str | None): Path of validation set info.
Default: None.
test_info_path (str | None): Path of test set info.
Default: None.
save_path (str | None): Path to save reduced point cloud data.
with_back (bool): Whether to flip the points to back.
"""
if
train_info_path
is
None
:
train_info_path
=
Path
(
data_path
)
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
if
val_info_path
is
None
:
val_info_path
=
Path
(
data_path
)
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
if
test_info_path
is
None
:
test_info_path
=
Path
(
data_path
)
/
f
'
{
pkl_prefix
}
_infos_test.pkl'
print
(
'create reduced point cloud for training set'
)
_create_reduced_point_cloud
(
data_path
,
train_info_path
,
save_path
)
print
(
'create reduced point cloud for validation set'
)
_create_reduced_point_cloud
(
data_path
,
val_info_path
,
save_path
)
print
(
'create reduced point cloud for testing set'
)
_create_reduced_point_cloud
(
data_path
,
test_info_path
,
save_path
)
if
with_back
:
_create_reduced_point_cloud
(
data_path
,
train_info_path
,
save_path
,
back
=
True
)
_create_reduced_point_cloud
(
data_path
,
val_info_path
,
save_path
,
back
=
True
)
_create_reduced_point_cloud
(
data_path
,
test_info_path
,
save_path
,
back
=
True
)
def
export_2d_annotation
(
root_path
,
info_path
,
mono3d
=
True
):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
mono3d (bool): Whether to export mono3d annotation. Default: True.
"""
# get bbox annotations for camera
kitti_infos
=
mmcv
.
load
(
info_path
)
cat2Ids
=
[
dict
(
id
=
kitti_categories
.
index
(
cat_name
),
name
=
cat_name
)
for
cat_name
in
kitti_categories
]
coco_ann_id
=
0
coco_2d_dict
=
dict
(
annotations
=
[],
images
=
[],
categories
=
cat2Ids
)
from
os
import
path
as
osp
for
info
in
mmcv
.
track_iter_progress
(
kitti_infos
):
coco_infos
=
get_2d_boxes
(
info
,
occluded
=
[
0
,
1
,
2
,
3
],
mono3d
=
mono3d
)
(
height
,
width
,
_
)
=
mmcv
.
imread
(
osp
.
join
(
root_path
,
info
[
'image'
][
'image_path'
])).
shape
coco_2d_dict
[
'images'
].
append
(
dict
(
file_name
=
info
[
'image'
][
'image_path'
],
id
=
info
[
'image'
][
'image_idx'
],
Tri2v
=
info
[
'calib'
][
'Tr_imu_to_velo'
],
Trv2c
=
info
[
'calib'
][
'Tr_velo_to_cam'
],
rect
=
info
[
'calib'
][
'R0_rect'
],
cam_intrinsic
=
info
[
'calib'
][
'P2'
],
width
=
width
,
height
=
height
))
for
coco_info
in
coco_infos
:
if
coco_info
is
None
:
continue
# add an empty key for coco format
coco_info
[
'segmentation'
]
=
[]
coco_info
[
'id'
]
=
coco_ann_id
coco_2d_dict
[
'annotations'
].
append
(
coco_info
)
coco_ann_id
+=
1
if
mono3d
:
json_prefix
=
f
'
{
info_path
[:
-
4
]
}
_mono3d'
else
:
json_prefix
=
f
'
{
info_path
[:
-
4
]
}
'
mmcv
.
dump
(
coco_2d_dict
,
f
'
{
json_prefix
}
.coco.json'
)
def
get_2d_boxes
(
info
,
occluded
,
mono3d
=
True
):
"""Get the 2D annotation records for a given info.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
\
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
\
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
P2
=
info
[
'calib'
][
'P2'
]
repro_recs
=
[]
# if no annotations in info (test dataset), then return
if
'annos'
not
in
info
:
return
repro_recs
# Get all the annotation with the specified visibilties.
ann_dicts
=
info
[
'annos'
]
mask
=
[(
ocld
in
occluded
)
for
ocld
in
ann_dicts
[
'occluded'
]]
for
k
in
ann_dicts
.
keys
():
ann_dicts
[
k
]
=
ann_dicts
[
k
][
mask
]
# convert dict of list to list of dict
ann_recs
=
[]
for
i
in
range
(
len
(
ann_dicts
[
'occluded'
])):
ann_rec
=
{}
for
k
in
ann_dicts
.
keys
():
ann_rec
[
k
]
=
ann_dicts
[
k
][
i
]
ann_recs
.
append
(
ann_rec
)
for
ann_idx
,
ann_rec
in
enumerate
(
ann_recs
):
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
\
f
"
{
info
[
'image'
][
'image_idx'
]
}
.
{
ann_idx
}
"
ann_rec
[
'sample_data_token'
]
=
info
[
'image'
][
'image_idx'
]
sample_data_token
=
info
[
'image'
][
'image_idx'
]
loc
=
ann_rec
[
'location'
][
np
.
newaxis
,
:]
dim
=
ann_rec
[
'dimensions'
][
np
.
newaxis
,
:]
rot
=
ann_rec
[
'rotation_y'
][
np
.
newaxis
,
np
.
newaxis
]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst
=
np
.
array
([
0.5
,
0.5
,
0.5
])
src
=
np
.
array
([
0.5
,
1.0
,
0.5
])
loc
=
loc
+
dim
*
(
dst
-
src
)
offset
=
(
info
[
'calib'
][
'P2'
][
0
,
3
]
-
info
[
'calib'
][
'P0'
][
0
,
3
])
\
/
info
[
'calib'
][
'P2'
][
0
,
0
]
loc_3d
=
np
.
copy
(
loc
)
loc_3d
[
0
,
0
]
+=
offset
gt_bbox_3d
=
np
.
concatenate
([
loc
,
dim
,
rot
],
axis
=
1
).
astype
(
np
.
float32
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box_np_ops
.
center_to_corner_box3d
(
gt_bbox_3d
[:,
:
3
],
gt_bbox_3d
[:,
3
:
6
],
gt_bbox_3d
[:,
6
],
[
0.5
,
0.5
,
0.5
],
axis
=
1
)
corners_3d
=
corners_3d
[
0
].
T
# (1, 8, 3) -> (3, 8)
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
camera_intrinsic
=
P2
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
sample_data_token
,
info
[
'image'
][
'image_path'
])
# If mono3d=True, add 3D annotations in camera coordinates
if
mono3d
and
(
repro_rec
is
not
None
):
repro_rec
[
'bbox_cam3d'
]
=
np
.
concatenate
(
[
loc_3d
,
dim
,
rot
],
axis
=
1
).
astype
(
np
.
float32
).
squeeze
().
tolist
()
repro_rec
[
'velo_cam3d'
]
=
-
1
# no velocity in KITTI
center3d
=
np
.
array
(
loc
).
reshape
([
1
,
3
])
center2d
=
box_np_ops
.
points_cam2img
(
center3d
,
camera_intrinsic
,
with_depth
=
True
)
repro_rec
[
'center2d'
]
=
center2d
.
squeeze
().
tolist
()
# normalized center2D + depth
# samples with depth < 0 will be removed
if
repro_rec
[
'center2d'
][
2
]
<=
0
:
continue
repro_rec
[
'attribute_name'
]
=
-
1
# no attribute in KITTI
repro_rec
[
'attribute_id'
]
=
-
1
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
generate_record
(
ann_rec
,
x1
,
y1
,
x2
,
y2
,
sample_data_token
,
filename
):
"""Generate one 2D annotation record given various informations on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): flie name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, dx, dy of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec
=
OrderedDict
()
repro_rec
[
'sample_data_token'
]
=
sample_data_token
coco_rec
=
dict
()
key_mapping
=
{
'name'
:
'category_name'
,
'num_points_in_gt'
:
'num_lidar_pts'
,
'sample_annotation_token'
:
'sample_annotation_token'
,
'sample_data_token'
:
'sample_data_token'
,
}
for
key
,
value
in
ann_rec
.
items
():
if
key
in
key_mapping
.
keys
():
repro_rec
[
key_mapping
[
key
]]
=
value
repro_rec
[
'bbox_corners'
]
=
[
x1
,
y1
,
x2
,
y2
]
repro_rec
[
'filename'
]
=
filename
coco_rec
[
'file_name'
]
=
filename
coco_rec
[
'image_id'
]
=
sample_data_token
coco_rec
[
'area'
]
=
(
y2
-
y1
)
*
(
x2
-
x1
)
if
repro_rec
[
'category_name'
]
not
in
kitti_categories
:
return
None
cat_name
=
repro_rec
[
'category_name'
]
coco_rec
[
'category_name'
]
=
cat_name
coco_rec
[
'category_id'
]
=
kitti_categories
.
index
(
cat_name
)
coco_rec
[
'bbox'
]
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
coco_rec
[
'iscrowd'
]
=
0
return
coco_rec
docker-hub/MapTRv2/MapTR/tools/data_converter/kitti_data_utils.py
0 → 100644
View file @
19472568
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
from
collections
import
OrderedDict
from
concurrent
import
futures
as
futures
from
os
import
path
as
osp
from
pathlib
import
Path
from
skimage
import
io
def
get_image_index_str
(
img_idx
,
use_prefix_id
=
False
):
if
use_prefix_id
:
return
'{:07d}'
.
format
(
img_idx
)
else
:
return
'{:06d}'
.
format
(
img_idx
)
def
get_kitti_info_path
(
idx
,
prefix
,
info_type
=
'image_2'
,
file_tail
=
'.png'
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
img_idx_str
=
get_image_index_str
(
idx
,
use_prefix_id
)
img_idx_str
+=
file_tail
prefix
=
Path
(
prefix
)
if
training
:
file_path
=
Path
(
'training'
)
/
info_type
/
img_idx_str
else
:
file_path
=
Path
(
'testing'
)
/
info_type
/
img_idx_str
if
exist_check
and
not
(
prefix
/
file_path
).
exists
():
raise
ValueError
(
'file not exist: {}'
.
format
(
file_path
))
if
relative_path
:
return
str
(
file_path
)
else
:
return
str
(
prefix
/
file_path
)
def
get_image_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
info_type
=
'image_2'
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
info_type
,
'.png'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_label_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
info_type
=
'label_2'
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
info_type
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_velodyne_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'velodyne'
,
'.bin'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_calib_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'calib'
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_pose_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'pose'
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_label_anno
(
label_path
):
annotations
=
{}
annotations
.
update
({
'name'
:
[],
'truncated'
:
[],
'occluded'
:
[],
'alpha'
:
[],
'bbox'
:
[],
'dimensions'
:
[],
'location'
:
[],
'rotation_y'
:
[]
})
with
open
(
label_path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
# if len(lines) == 0 or len(lines[0]) < 15:
# content = []
# else:
content
=
[
line
.
strip
().
split
(
' '
)
for
line
in
lines
]
num_objects
=
len
([
x
[
0
]
for
x
in
content
if
x
[
0
]
!=
'DontCare'
])
annotations
[
'name'
]
=
np
.
array
([
x
[
0
]
for
x
in
content
])
num_gt
=
len
(
annotations
[
'name'
])
annotations
[
'truncated'
]
=
np
.
array
([
float
(
x
[
1
])
for
x
in
content
])
annotations
[
'occluded'
]
=
np
.
array
([
int
(
x
[
2
])
for
x
in
content
])
annotations
[
'alpha'
]
=
np
.
array
([
float
(
x
[
3
])
for
x
in
content
])
annotations
[
'bbox'
]
=
np
.
array
([[
float
(
info
)
for
info
in
x
[
4
:
8
]]
for
x
in
content
]).
reshape
(
-
1
,
4
)
# dimensions will convert hwl format to standard lhw(camera) format.
annotations
[
'dimensions'
]
=
np
.
array
([[
float
(
info
)
for
info
in
x
[
8
:
11
]]
for
x
in
content
]).
reshape
(
-
1
,
3
)[:,
[
2
,
0
,
1
]]
annotations
[
'location'
]
=
np
.
array
([[
float
(
info
)
for
info
in
x
[
11
:
14
]]
for
x
in
content
]).
reshape
(
-
1
,
3
)
annotations
[
'rotation_y'
]
=
np
.
array
([
float
(
x
[
14
])
for
x
in
content
]).
reshape
(
-
1
)
if
len
(
content
)
!=
0
and
len
(
content
[
0
])
==
16
:
# have score
annotations
[
'score'
]
=
np
.
array
([
float
(
x
[
15
])
for
x
in
content
])
else
:
annotations
[
'score'
]
=
np
.
zeros
((
annotations
[
'bbox'
].
shape
[
0
],
))
index
=
list
(
range
(
num_objects
))
+
[
-
1
]
*
(
num_gt
-
num_objects
)
annotations
[
'index'
]
=
np
.
array
(
index
,
dtype
=
np
.
int32
)
annotations
[
'group_ids'
]
=
np
.
arange
(
num_gt
,
dtype
=
np
.
int32
)
return
annotations
def
_extend_matrix
(
mat
):
mat
=
np
.
concatenate
([
mat
,
np
.
array
([[
0.
,
0.
,
0.
,
1.
]])],
axis
=
0
)
return
mat
def
get_kitti_image_info
(
path
,
training
=
True
,
label_info
=
True
,
velodyne
=
False
,
calib
=
False
,
image_ids
=
7481
,
extend_matrix
=
True
,
num_worker
=
8
,
relative_path
=
True
,
with_imageshape
=
True
):
"""
KITTI annotation format version 2:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 4
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam: ...
P2: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
root_path
=
Path
(
path
)
if
not
isinstance
(
image_ids
,
list
):
image_ids
=
list
(
range
(
image_ids
))
def
map_func
(
idx
):
info
=
{}
pc_info
=
{
'num_features'
:
4
}
calib_info
=
{}
image_info
=
{
'image_idx'
:
idx
}
annotations
=
None
if
velodyne
:
pc_info
[
'velodyne_path'
]
=
get_velodyne_path
(
idx
,
path
,
training
,
relative_path
)
image_info
[
'image_path'
]
=
get_image_path
(
idx
,
path
,
training
,
relative_path
)
if
with_imageshape
:
img_path
=
image_info
[
'image_path'
]
if
relative_path
:
img_path
=
str
(
root_path
/
img_path
)
image_info
[
'image_shape'
]
=
np
.
array
(
io
.
imread
(
img_path
).
shape
[:
2
],
dtype
=
np
.
int32
)
if
label_info
:
label_path
=
get_label_path
(
idx
,
path
,
training
,
relative_path
)
if
relative_path
:
label_path
=
str
(
root_path
/
label_path
)
annotations
=
get_label_anno
(
label_path
)
info
[
'image'
]
=
image_info
info
[
'point_cloud'
]
=
pc_info
if
calib
:
calib_path
=
get_calib_path
(
idx
,
path
,
training
,
relative_path
=
False
)
with
open
(
calib_path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
P0
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
0
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P1
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
1
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P2
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
2
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P3
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
3
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
extend_matrix
:
P0
=
_extend_matrix
(
P0
)
P1
=
_extend_matrix
(
P1
)
P2
=
_extend_matrix
(
P2
)
P3
=
_extend_matrix
(
P3
)
R0_rect
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
4
].
split
(
' '
)[
1
:
10
]
]).
reshape
([
3
,
3
])
if
extend_matrix
:
rect_4x4
=
np
.
zeros
([
4
,
4
],
dtype
=
R0_rect
.
dtype
)
rect_4x4
[
3
,
3
]
=
1.
rect_4x4
[:
3
,
:
3
]
=
R0_rect
else
:
rect_4x4
=
R0_rect
Tr_velo_to_cam
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
5
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
Tr_imu_to_velo
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
6
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
extend_matrix
:
Tr_velo_to_cam
=
_extend_matrix
(
Tr_velo_to_cam
)
Tr_imu_to_velo
=
_extend_matrix
(
Tr_imu_to_velo
)
calib_info
[
'P0'
]
=
P0
calib_info
[
'P1'
]
=
P1
calib_info
[
'P2'
]
=
P2
calib_info
[
'P3'
]
=
P3
calib_info
[
'R0_rect'
]
=
rect_4x4
calib_info
[
'Tr_velo_to_cam'
]
=
Tr_velo_to_cam
calib_info
[
'Tr_imu_to_velo'
]
=
Tr_imu_to_velo
info
[
'calib'
]
=
calib_info
if
annotations
is
not
None
:
info
[
'annos'
]
=
annotations
add_difficulty_to_annos
(
info
)
return
info
with
futures
.
ThreadPoolExecutor
(
num_worker
)
as
executor
:
image_infos
=
executor
.
map
(
map_func
,
image_ids
)
return
list
(
image_infos
)
def
get_waymo_image_info
(
path
,
training
=
True
,
label_info
=
True
,
velodyne
=
False
,
calib
=
False
,
pose
=
False
,
image_ids
=
7481
,
extend_matrix
=
True
,
num_worker
=
8
,
relative_path
=
True
,
with_imageshape
=
True
,
max_sweeps
=
5
):
"""
Waymo annotation format version like KITTI:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 6
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam0: ...
P0: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
root_path
=
Path
(
path
)
if
not
isinstance
(
image_ids
,
list
):
image_ids
=
list
(
range
(
image_ids
))
def
map_func
(
idx
):
info
=
{}
pc_info
=
{
'num_features'
:
6
}
calib_info
=
{}
image_info
=
{
'image_idx'
:
idx
}
annotations
=
None
if
velodyne
:
pc_info
[
'velodyne_path'
]
=
get_velodyne_path
(
idx
,
path
,
training
,
relative_path
,
use_prefix_id
=
True
)
points
=
np
.
fromfile
(
Path
(
path
)
/
pc_info
[
'velodyne_path'
],
dtype
=
np
.
float32
)
points
=
np
.
copy
(
points
).
reshape
(
-
1
,
pc_info
[
'num_features'
])
info
[
'timestamp'
]
=
np
.
int64
(
points
[
0
,
-
1
])
# values of the last dim are all the timestamp
image_info
[
'image_path'
]
=
get_image_path
(
idx
,
path
,
training
,
relative_path
,
info_type
=
'image_0'
,
use_prefix_id
=
True
)
if
with_imageshape
:
img_path
=
image_info
[
'image_path'
]
if
relative_path
:
img_path
=
str
(
root_path
/
img_path
)
image_info
[
'image_shape'
]
=
np
.
array
(
io
.
imread
(
img_path
).
shape
[:
2
],
dtype
=
np
.
int32
)
if
label_info
:
label_path
=
get_label_path
(
idx
,
path
,
training
,
relative_path
,
info_type
=
'label_all'
,
use_prefix_id
=
True
)
if
relative_path
:
label_path
=
str
(
root_path
/
label_path
)
annotations
=
get_label_anno
(
label_path
)
info
[
'image'
]
=
image_info
info
[
'point_cloud'
]
=
pc_info
if
calib
:
calib_path
=
get_calib_path
(
idx
,
path
,
training
,
relative_path
=
False
,
use_prefix_id
=
True
)
with
open
(
calib_path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
P0
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
0
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P1
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
1
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P2
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
2
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P3
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
3
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P4
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
4
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
extend_matrix
:
P0
=
_extend_matrix
(
P0
)
P1
=
_extend_matrix
(
P1
)
P2
=
_extend_matrix
(
P2
)
P3
=
_extend_matrix
(
P3
)
P4
=
_extend_matrix
(
P4
)
R0_rect
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
5
].
split
(
' '
)[
1
:
10
]
]).
reshape
([
3
,
3
])
if
extend_matrix
:
rect_4x4
=
np
.
zeros
([
4
,
4
],
dtype
=
R0_rect
.
dtype
)
rect_4x4
[
3
,
3
]
=
1.
rect_4x4
[:
3
,
:
3
]
=
R0_rect
else
:
rect_4x4
=
R0_rect
Tr_velo_to_cam
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
6
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
extend_matrix
:
Tr_velo_to_cam
=
_extend_matrix
(
Tr_velo_to_cam
)
calib_info
[
'P0'
]
=
P0
calib_info
[
'P1'
]
=
P1
calib_info
[
'P2'
]
=
P2
calib_info
[
'P3'
]
=
P3
calib_info
[
'P4'
]
=
P4
calib_info
[
'R0_rect'
]
=
rect_4x4
calib_info
[
'Tr_velo_to_cam'
]
=
Tr_velo_to_cam
info
[
'calib'
]
=
calib_info
if
pose
:
pose_path
=
get_pose_path
(
idx
,
path
,
training
,
relative_path
=
False
,
use_prefix_id
=
True
)
info
[
'pose'
]
=
np
.
loadtxt
(
pose_path
)
if
annotations
is
not
None
:
info
[
'annos'
]
=
annotations
info
[
'annos'
][
'camera_id'
]
=
info
[
'annos'
].
pop
(
'score'
)
add_difficulty_to_annos
(
info
)
sweeps
=
[]
prev_idx
=
idx
while
len
(
sweeps
)
<
max_sweeps
:
prev_info
=
{}
prev_idx
-=
1
prev_info
[
'velodyne_path'
]
=
get_velodyne_path
(
prev_idx
,
path
,
training
,
relative_path
,
exist_check
=
False
,
use_prefix_id
=
True
)
if_prev_exists
=
osp
.
exists
(
Path
(
path
)
/
prev_info
[
'velodyne_path'
])
if
if_prev_exists
:
prev_points
=
np
.
fromfile
(
Path
(
path
)
/
prev_info
[
'velodyne_path'
],
dtype
=
np
.
float32
)
prev_points
=
np
.
copy
(
prev_points
).
reshape
(
-
1
,
pc_info
[
'num_features'
])
prev_info
[
'timestamp'
]
=
np
.
int64
(
prev_points
[
0
,
-
1
])
prev_pose_path
=
get_pose_path
(
prev_idx
,
path
,
training
,
relative_path
=
False
,
use_prefix_id
=
True
)
prev_info
[
'pose'
]
=
np
.
loadtxt
(
prev_pose_path
)
sweeps
.
append
(
prev_info
)
else
:
break
info
[
'sweeps'
]
=
sweeps
return
info
with
futures
.
ThreadPoolExecutor
(
num_worker
)
as
executor
:
image_infos
=
executor
.
map
(
map_func
,
image_ids
)
return
list
(
image_infos
)
def
kitti_anno_to_label_file
(
annos
,
folder
):
folder
=
Path
(
folder
)
for
anno
in
annos
:
image_idx
=
anno
[
'metadata'
][
'image_idx'
]
label_lines
=
[]
for
j
in
range
(
anno
[
'bbox'
].
shape
[
0
]):
label_dict
=
{
'name'
:
anno
[
'name'
][
j
],
'alpha'
:
anno
[
'alpha'
][
j
],
'bbox'
:
anno
[
'bbox'
][
j
],
'location'
:
anno
[
'location'
][
j
],
'dimensions'
:
anno
[
'dimensions'
][
j
],
'rotation_y'
:
anno
[
'rotation_y'
][
j
],
'score'
:
anno
[
'score'
][
j
],
}
label_line
=
kitti_result_line
(
label_dict
)
label_lines
.
append
(
label_line
)
label_file
=
folder
/
f
'
{
get_image_index_str
(
image_idx
)
}
.txt'
label_str
=
'
\n
'
.
join
(
label_lines
)
with
open
(
label_file
,
'w'
)
as
f
:
f
.
write
(
label_str
)
def
add_difficulty_to_annos
(
info
):
min_height
=
[
40
,
25
,
25
]
# minimum height for evaluated groundtruth/detections
max_occlusion
=
[
0
,
1
,
2
]
# maximum occlusion level of the groundtruth used for evaluation
max_trunc
=
[
0.15
,
0.3
,
0.5
]
# maximum truncation level of the groundtruth used for evaluation
annos
=
info
[
'annos'
]
dims
=
annos
[
'dimensions'
]
# lhw format
bbox
=
annos
[
'bbox'
]
height
=
bbox
[:,
3
]
-
bbox
[:,
1
]
occlusion
=
annos
[
'occluded'
]
truncation
=
annos
[
'truncated'
]
diff
=
[]
easy_mask
=
np
.
ones
((
len
(
dims
),
),
dtype
=
np
.
bool
)
moderate_mask
=
np
.
ones
((
len
(
dims
),
),
dtype
=
np
.
bool
)
hard_mask
=
np
.
ones
((
len
(
dims
),
),
dtype
=
np
.
bool
)
i
=
0
for
h
,
o
,
t
in
zip
(
height
,
occlusion
,
truncation
):
if
o
>
max_occlusion
[
0
]
or
h
<=
min_height
[
0
]
or
t
>
max_trunc
[
0
]:
easy_mask
[
i
]
=
False
if
o
>
max_occlusion
[
1
]
or
h
<=
min_height
[
1
]
or
t
>
max_trunc
[
1
]:
moderate_mask
[
i
]
=
False
if
o
>
max_occlusion
[
2
]
or
h
<=
min_height
[
2
]
or
t
>
max_trunc
[
2
]:
hard_mask
[
i
]
=
False
i
+=
1
is_easy
=
easy_mask
is_moderate
=
np
.
logical_xor
(
easy_mask
,
moderate_mask
)
is_hard
=
np
.
logical_xor
(
hard_mask
,
moderate_mask
)
for
i
in
range
(
len
(
dims
)):
if
is_easy
[
i
]:
diff
.
append
(
0
)
elif
is_moderate
[
i
]:
diff
.
append
(
1
)
elif
is_hard
[
i
]:
diff
.
append
(
2
)
else
:
diff
.
append
(
-
1
)
annos
[
'difficulty'
]
=
np
.
array
(
diff
,
np
.
int32
)
return
diff
def
kitti_result_line
(
result_dict
,
precision
=
4
):
prec_float
=
'{'
+
':.{}f'
.
format
(
precision
)
+
'}'
res_line
=
[]
all_field_default
=
OrderedDict
([
(
'name'
,
None
),
(
'truncated'
,
-
1
),
(
'occluded'
,
-
1
),
(
'alpha'
,
-
10
),
(
'bbox'
,
None
),
(
'dimensions'
,
[
-
1
,
-
1
,
-
1
]),
(
'location'
,
[
-
1000
,
-
1000
,
-
1000
]),
(
'rotation_y'
,
-
10
),
(
'score'
,
0.0
),
])
res_dict
=
[(
key
,
None
)
for
key
,
val
in
all_field_default
.
items
()]
res_dict
=
OrderedDict
(
res_dict
)
for
key
,
val
in
result_dict
.
items
():
if
all_field_default
[
key
]
is
None
and
val
is
None
:
raise
ValueError
(
'you must specify a value for {}'
.
format
(
key
))
res_dict
[
key
]
=
val
for
key
,
val
in
res_dict
.
items
():
if
key
==
'name'
:
res_line
.
append
(
val
)
elif
key
in
[
'truncated'
,
'alpha'
,
'rotation_y'
,
'score'
]:
if
val
is
None
:
res_line
.
append
(
str
(
all_field_default
[
key
]))
else
:
res_line
.
append
(
prec_float
.
format
(
val
))
elif
key
==
'occluded'
:
if
val
is
None
:
res_line
.
append
(
str
(
all_field_default
[
key
]))
else
:
res_line
.
append
(
'{}'
.
format
(
val
))
elif
key
in
[
'bbox'
,
'dimensions'
,
'location'
]:
if
val
is
None
:
res_line
+=
[
str
(
v
)
for
v
in
all_field_default
[
key
]]
else
:
res_line
+=
[
prec_float
.
format
(
v
)
for
v
in
val
]
else
:
raise
ValueError
(
'unknown key. supported key:{}'
.
format
(
res_dict
.
keys
()))
return
' '
.
join
(
res_line
)
Prev
1
…
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment