Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
d2b71343
Commit
d2b71343
authored
Apr 08, 2026
by
雍大凯
Browse files
add code
parent
69e57885
Changes
259
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5442 additions
and
0 deletions
+5442
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark.py
...r-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark.py
+144
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_sequential.py
...OCC/Flashocc/tools/analysis_tools/benchmark_sequential.py
+148
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_trt.py
...b/FlashOCC/Flashocc/tools/analysis_tools/benchmark_trt.py
+282
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_view_transformer.py
...ashocc/tools/analysis_tools/benchmark_view_transformer.py
+150
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/get_flops.py
...r-hub/FlashOCC/Flashocc/tools/analysis_tools/get_flops.py
+133
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis.py
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis.py
+308
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis_occ.py
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis_occ.py
+415
-0
docker-hub/FlashOCC/Flashocc/tools/convert_bevdet_to_TRT.py
docker-hub/FlashOCC/Flashocc/tools/convert_bevdet_to_TRT.py
+560
-0
docker-hub/FlashOCC/Flashocc/tools/create_data_bevdet.py
docker-hub/FlashOCC/Flashocc/tools/create_data_bevdet.py
+149
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/__init__.py
...er-hub/FlashOCC/Flashocc/tools/data_converter/__init__.py
+1
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/__init__.cpython-310.pyc
...tools/data_converter/__pycache__/__init__.cpython-310.pyc
+0
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/nuscenes_converter.cpython-310.pyc
..._converter/__pycache__/nuscenes_converter.cpython-310.pyc
+0
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/create_gt_database.py
...shOCC/Flashocc/tools/data_converter/create_gt_database.py
+624
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/indoor_converter.py
...lashOCC/Flashocc/tools/data_converter/indoor_converter.py
+121
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_converter.py
...FlashOCC/Flashocc/tools/data_converter/kitti_converter.py
+624
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_data_utils.py
...lashOCC/Flashocc/tools/data_converter/kitti_data_utils.py
+619
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_converter.py
.../FlashOCC/Flashocc/tools/data_converter/lyft_converter.py
+271
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_data_fixer.py
...FlashOCC/Flashocc/tools/data_converter/lyft_data_fixer.py
+39
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/nuimage_converter.py
...ashOCC/Flashocc/tools/data_converter/nuimage_converter.py
+226
-0
docker-hub/FlashOCC/Flashocc/tools/data_converter/nuscenes_converter.py
...shOCC/Flashocc/tools/data_converter/nuscenes_converter.py
+628
-0
No files found.
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
sys
import
argparse
import
time
import
os
import
torch
from
mmcv
import
Config
from
mmcv.parallel
import
MMDataParallel
from
mmcv.runner
import
load_checkpoint
,
wrap_fp16_model
from
mmdet3d.datasets
import
build_dataloader
,
build_dataset
from
mmdet3d.models
import
build_detector
from
tools.misc.fuse_conv_bn
import
fuse_module
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
print
(
sys
.
path
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'MMDet benchmark a model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--samples'
,
default
=
500
,
help
=
'samples to benchmark'
)
parser
.
add_argument
(
'--log-interval'
,
default
=
50
,
help
=
'interval of logging'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
parser
.
add_argument
(
'--w_pano'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--w_panoproc'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--no-acceleration'
,
action
=
'store_true'
,
help
=
'Omit the pre-computation acceleration'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
cfg
.
model
.
pretrained
=
None
cfg
.
data
.
test
.
test_mode
=
True
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
0
,
dist
=
False
,
shuffle
=
False
)
# build the model and load checkpoint
if
not
args
.
no_acceleration
:
cfg
.
model
.
img_view_transformer
.
accelerate
=
True
cfg
.
model
.
train_cfg
=
None
model
=
build_detector
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
wrap_fp16_model
(
model
)
load_checkpoint
(
model
,
args
.
checkpoint
,
map_location
=
'cpu'
)
if
args
.
fuse_conv_bn
:
model
=
fuse_module
(
model
)
model
=
MMDataParallel
(
model
,
device_ids
=
[
0
])
model
.
eval
()
# the first several iterations may be very slow so skip them
num_warmup
=
5
pure_inf_time
=
0
# benchmark with several samples and take the average
# for i, data_ori in enumerate(data_loader):
# if i == 0:
# break
# import copy
# for i in range(500):
# data = copy.deepcopy(data_ori)
for
i
,
data
in
enumerate
(
data_loader
):
torch
.
cuda
.
synchronize
()
start_time
=
time
.
perf_counter
()
with
torch
.
no_grad
():
model
(
return_loss
=
False
,
rescale
=
True
,
w_pano
=
args
.
w_pano
,
w_panoproc
=
args
.
w_panoproc
,
**
data
)
torch
.
cuda
.
synchronize
()
elapsed
=
time
.
perf_counter
()
-
start_time
if
i
>=
num_warmup
:
pure_inf_time
+=
elapsed
if
(
i
+
1
)
%
args
.
log_interval
==
0
:
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Done image [
{
i
+
1
:
<
3
}
/
{
args
.
samples
}
], '
f
'fps:
{
fps
:.
1
f
}
img / s'
)
if
(
i
+
1
)
==
args
.
samples
:
pure_inf_time
+=
elapsed
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Overall
\n
fps:
{
fps
:.
2
f
}
img / s '
f
'
\n
inference time:
{
1000
/
fps
:.
2
f
}
ms'
)
break
if
__name__
==
'__main__'
:
main
()
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_sequential.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
time
import
os
import
sys
import
torch
from
mmcv
import
Config
from
mmcv.parallel
import
MMDataParallel
from
mmcv.runner
import
load_checkpoint
,
wrap_fp16_model
from
mmdet3d.datasets
import
build_dataloader
,
build_dataset
from
mmdet3d.models
import
build_detector
from
tools.misc.fuse_conv_bn
import
fuse_module
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
print
(
sys
.
path
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'MMDet benchmark a model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--samples'
,
default
=
400
,
help
=
'samples to benchmark'
)
parser
.
add_argument
(
'--log-interval'
,
default
=
50
,
help
=
'interval of logging'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
parser
.
add_argument
(
'--w_pano'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--w_panoproc'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--no-acceleration'
,
action
=
'store_true'
,
help
=
'Omit the pre-computation acceleration'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
cfg
.
model
.
pretrained
=
None
cfg
.
data
.
test
.
test_mode
=
True
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
0
,
dist
=
False
,
shuffle
=
False
)
# build the model and load checkpoint
cfg
.
model
.
train_cfg
=
None
cfg
.
model
.
align_after_view_transfromation
=
True
if
not
args
.
no_acceleration
:
cfg
.
model
.
img_view_transformer
.
accelerate
=
True
model
=
build_detector
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
wrap_fp16_model
(
model
)
load_checkpoint
(
model
,
args
.
checkpoint
,
map_location
=
'cpu'
)
if
args
.
fuse_conv_bn
:
model
=
fuse_module
(
model
)
model
=
MMDataParallel
(
model
,
device_ids
=
[
0
])
model
.
eval
()
# the first several iterations may be very slow so skip them
num_warmup
=
5
pure_inf_time
=
0
# benchmark with several samples and take the average
for
i
,
data
in
enumerate
(
data_loader
):
inputs
=
[
d
.
cuda
()
for
d
in
data
[
'img_inputs'
][
0
]]
with
torch
.
no_grad
():
feat_prev
,
inputs
=
model
.
module
.
extract_img_feat
(
inputs
,
pred_prev
=
True
,
img_metas
=
None
)
data
[
'img_inputs'
][
0
]
=
inputs
torch
.
cuda
.
synchronize
()
start_time
=
time
.
perf_counter
()
with
torch
.
no_grad
():
model
(
return_loss
=
False
,
rescale
=
True
,
sequential
=
True
,
feat_prev
=
feat_prev
,
w_pano
=
args
.
w_pano
,
w_panoproc
=
args
.
w_panoproc
,
**
data
)
torch
.
cuda
.
synchronize
()
elapsed
=
time
.
perf_counter
()
-
start_time
if
i
>=
num_warmup
:
pure_inf_time
+=
elapsed
if
(
i
+
1
)
%
args
.
log_interval
==
0
:
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Done image [
{
i
+
1
:
<
3
}
/
{
args
.
samples
}
], '
f
'fps:
{
fps
:.
1
f
}
img / s'
)
if
(
i
+
1
)
==
args
.
samples
:
pure_inf_time
+=
elapsed
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Overall
\n
fps:
{
fps
:.
2
f
}
img / s '
f
'
\n
inference time:
{
1000
/
fps
:.
2
f
}
ms'
)
break
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_trt.py
0 → 100644
View file @
d2b71343
import
time
from
typing
import
Dict
,
Optional
,
Sequence
,
Union
import
os
from
os
import
path
as
osp
import
sys
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
import
tensorrt
as
trt
import
torch
import
torch.onnx
from
mmcv
import
Config
from
mmdeploy.backend.tensorrt
import
load_tensorrt_plugin
try
:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
compat_cfg
except
ImportError
:
from
mmdet3d.utils
import
compat_cfg
import
argparse
from
mmdet3d.core
import
bbox3d2result
from
mmdet3d.core.bbox.structures.box_3d_mode
import
LiDARInstance3DBoxes
from
mmdet3d.datasets
import
build_dataloader
,
build_dataset
from
mmdet3d.models
import
build_model
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Deploy BEVDet with Tensorrt'
)
parser
.
add_argument
(
'config'
,
help
=
'deploy config file path'
)
parser
.
add_argument
(
'engine'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--samples'
,
default
=
500
,
help
=
'samples to benchmark'
)
parser
.
add_argument
(
'--postprocessing'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--eval'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--prefetch'
,
action
=
'store_true'
,
help
=
'use prefetch to accelerate the data loading, '
'the inference speed is sightly degenerated due '
'to the computational occupancy of prefetch'
)
args
=
parser
.
parse_args
()
return
args
def
torch_dtype_from_trt
(
dtype
:
trt
.
DataType
)
->
torch
.
dtype
:
"""Convert pytorch dtype to TensorRT dtype.
Args:
dtype (str.DataType): The data type in tensorrt.
Returns:
torch.dtype: The corresponding data type in torch.
"""
if
dtype
==
trt
.
bool
:
return
torch
.
bool
elif
dtype
==
trt
.
int8
:
return
torch
.
int8
elif
dtype
==
trt
.
int32
:
return
torch
.
int32
elif
dtype
==
trt
.
float16
:
return
torch
.
float16
elif
dtype
==
trt
.
float32
:
return
torch
.
float32
else
:
raise
TypeError
(
f
'
{
dtype
}
is not supported by torch'
)
class
TRTWrapper
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
engine
:
Union
[
str
,
trt
.
ICudaEngine
],
output_names
:
Optional
[
Sequence
[
str
]]
=
None
)
->
None
:
super
().
__init__
()
self
.
engine
=
engine
if
isinstance
(
self
.
engine
,
str
):
with
trt
.
Logger
()
as
logger
,
trt
.
Runtime
(
logger
)
as
runtime
:
with
open
(
self
.
engine
,
mode
=
'rb'
)
as
f
:
engine_bytes
=
f
.
read
()
self
.
engine
=
runtime
.
deserialize_cuda_engine
(
engine_bytes
)
self
.
context
=
self
.
engine
.
create_execution_context
()
names
=
[
_
for
_
in
self
.
engine
]
input_names
=
list
(
filter
(
self
.
engine
.
binding_is_input
,
names
))
self
.
_input_names
=
input_names
self
.
_output_names
=
output_names
if
self
.
_output_names
is
None
:
output_names
=
list
(
set
(
names
)
-
set
(
input_names
))
self
.
_output_names
=
output_names
def
forward
(
self
,
inputs
:
Dict
[
str
,
torch
.
Tensor
]):
bindings
=
[
None
]
*
(
len
(
self
.
_input_names
)
+
len
(
self
.
_output_names
))
for
input_name
,
input_tensor
in
inputs
.
items
():
idx
=
self
.
engine
.
get_binding_index
(
input_name
)
self
.
context
.
set_binding_shape
(
idx
,
tuple
(
input_tensor
.
shape
))
bindings
[
idx
]
=
input_tensor
.
contiguous
().
data_ptr
()
# create output tensors
outputs
=
{}
for
output_name
in
self
.
_output_names
:
idx
=
self
.
engine
.
get_binding_index
(
output_name
)
dtype
=
torch_dtype_from_trt
(
self
.
engine
.
get_binding_dtype
(
idx
))
shape
=
tuple
(
self
.
context
.
get_binding_shape
(
idx
))
device
=
torch
.
device
(
'cuda'
)
output
=
torch
.
zeros
(
size
=
shape
,
dtype
=
dtype
,
device
=
device
)
outputs
[
output_name
]
=
output
bindings
[
idx
]
=
output
.
data_ptr
()
self
.
context
.
execute_async_v2
(
bindings
,
torch
.
cuda
.
current_stream
().
cuda_stream
)
return
outputs
def
get_plugin_names
():
return
[
pc
.
name
for
pc
in
trt
.
get_plugin_registry
().
plugin_creator_list
]
def
main
():
load_tensorrt_plugin
()
args
=
parse_args
()
if
args
.
eval
:
args
.
postprocessing
=
True
print
(
'Warnings: evaluation requirement detected, set '
'postprocessing=True for evaluation purpose'
)
cfg
=
Config
.
fromfile
(
args
.
config
)
cfg
.
model
.
pretrained
=
None
cfg
.
model
.
type
=
cfg
.
model
.
type
+
'TRT'
cfg
=
compat_cfg
(
cfg
)
cfg
.
gpu_ids
=
[
0
]
if
not
args
.
prefetch
:
cfg
.
data
.
test_dataloader
.
workers_per_gpu
=
0
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# build dataloader
assert
cfg
.
data
.
test
.
test_mode
test_dataloader_default_args
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
dist
=
False
,
shuffle
=
False
)
test_loader_cfg
=
{
**
test_dataloader_default_args
,
**
cfg
.
data
.
get
(
'test_dataloader'
,
{})
}
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
**
test_loader_cfg
)
# build the model
cfg
.
model
.
train_cfg
=
None
model
=
build_model
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
# build tensorrt model
if
(
cfg
.
model
.
get
(
'wdet3d'
,
True
)
==
True
)
and
(
cfg
.
model
.
get
(
'wocc'
,
True
)
==
False
):
trt_model
=
TRTWrapper
(
args
.
engine
,
[
f
'output_
{
i
}
'
for
i
in
range
(
6
*
len
(
model
.
pts_bbox_head
.
task_heads
))])
elif
(
cfg
.
model
.
get
(
'wdet3d'
,
True
)
==
True
)
and
(
cfg
.
model
.
get
(
'wocc'
,
True
)
==
True
):
trt_model
=
TRTWrapper
(
args
.
engine
,
[
f
'output_
{
i
}
'
for
i
in
range
(
1
+
6
*
len
(
model
.
pts_bbox_head
.
task_heads
))])
elif
(
cfg
.
model
.
get
(
'wdet3d'
,
True
)
==
False
)
and
(
cfg
.
model
.
get
(
'wocc'
,
True
)
==
True
):
trt_model
=
TRTWrapper
(
args
.
engine
,
[
f
'output_
{
i
}
'
for
i
in
range
(
1
)])
else
:
raise
(
" At least one of wdet3d and wocc is set as True!! "
)
num_warmup
=
50
pure_inf_time
=
0
init_
=
True
metas
=
dict
()
# benchmark with several samples and take the average
results
=
list
()
for
i
,
data
in
enumerate
(
data_loader
):
if
init_
:
inputs
=
[
t
.
cuda
()
for
t
in
data
[
'img_inputs'
][
0
]]
if
model
.
__class__
.
__name__
in
[
'FBOCCTRT'
,
'FBOCC2DTRT'
]:
metas_
=
model
.
get_bev_pool_input
(
inputs
,
img_metas
=
data
[
'img_metas'
])
else
:
if
model
.
__class__
.
__name__
in
[
'BEVDetOCCTRT'
]:
metas_
=
model
.
get_bev_pool_input
(
inputs
)
elif
model
.
__class__
.
__name__
in
[
'BEVDepthOCCTRT'
]:
metas_
,
mlp_input
=
model
.
get_bev_pool_input
(
inputs
)
if
model
.
__class__
.
__name__
in
[
'FBOCCTRT'
,
'FBOCC2DTRT'
,
'BEVDetOCCTRT'
]:
metas
=
dict
(
ranks_bev
=
metas_
[
0
].
int
().
contiguous
(),
ranks_depth
=
metas_
[
1
].
int
().
contiguous
(),
ranks_feat
=
metas_
[
2
].
int
().
contiguous
(),
interval_starts
=
metas_
[
3
].
int
().
contiguous
(),
interval_lengths
=
metas_
[
4
].
int
().
contiguous
())
elif
model
.
__class__
.
__name__
in
[
'BEVDepthOCCTRT'
]:
metas
=
dict
(
ranks_bev
=
metas_
[
0
].
int
().
contiguous
(),
ranks_depth
=
metas_
[
1
].
int
().
contiguous
(),
ranks_feat
=
metas_
[
2
].
int
().
contiguous
(),
interval_starts
=
metas_
[
3
].
int
().
contiguous
(),
interval_lengths
=
metas_
[
4
].
int
().
contiguous
(),
mlp_input
=
mlp_input
)
init_
=
False
img
=
data
[
'img_inputs'
][
0
][
0
].
cuda
().
squeeze
(
0
).
contiguous
()
if
img
.
shape
[
0
]
>
6
:
img
=
img
[:
6
]
torch
.
cuda
.
synchronize
()
start_time
=
time
.
perf_counter
()
trt_output
=
trt_model
.
forward
(
dict
(
img
=
img
,
**
metas
))
# postprocessing
if
args
.
postprocessing
:
if
cfg
.
model
.
get
(
'wdet3d'
,
True
):
trt_output_det
=
[
trt_output
[
f
'output_
{
i
}
'
]
for
i
in
range
(
6
*
len
(
model
.
pts_bbox_head
.
task_heads
))]
pred
=
model
.
result_deserialize
(
trt_output_det
)
img_metas
=
[
dict
(
box_type_3d
=
LiDARInstance3DBoxes
)]
bbox_list
=
model
.
pts_bbox_head
.
get_bboxes
(
pred
,
img_metas
,
rescale
=
True
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
if
cfg
.
model
.
get
(
'wocc'
,
True
):
# occupancy
if
cfg
.
model
.
get
(
'wdet3d'
,
True
):
occ_preds
=
model
.
occ_head
.
get_occ
(
trt_output
[
'output_6'
])
# List[(Dx, Dy, Dz), (Dx, Dy, Dz), ...]
else
:
occ_preds
=
model
.
occ_head
.
get_occ
(
trt_output
[
'output_0'
])
# List[(Dx, Dy, Dz), (Dx, Dy, Dz), ...]
if
args
.
eval
:
if
cfg
.
model
.
get
(
'wdet3d'
,
True
)
and
(
not
cfg
.
model
.
get
(
'wocc'
,
True
)):
results
.
append
(
bbox_results
[
0
])
elif
cfg
.
model
.
get
(
'wdet3d'
,
True
)
and
cfg
.
model
.
get
(
'wocc'
,
True
):
results
.
append
({
'pts_bbox'
:
bbox_results
[
0
],
'pred_occ'
:
occ_preds
[
0
]})
elif
(
not
cfg
.
model
.
get
(
'wdet3d'
,
False
))
and
cfg
.
model
.
get
(
'wocc'
,
True
):
results
.
append
(
occ_preds
[
0
])
torch
.
cuda
.
synchronize
()
elapsed
=
time
.
perf_counter
()
-
start_time
if
i
>=
num_warmup
:
pure_inf_time
+=
elapsed
if
(
i
+
1
)
%
50
==
0
:
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Done image [
{
i
+
1
:
<
3
}
/
{
args
.
samples
}
], '
f
'fps:
{
fps
:.
2
f
}
img / s'
)
if
(
i
+
1
)
==
args
.
samples
:
pure_inf_time
+=
elapsed
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Overall
\n
fps:
{
fps
:.
2
f
}
img / s '
f
'
\n
inference time:
{
1000
/
fps
:.
2
f
}
ms'
)
if
not
args
.
eval
:
return
assert
args
.
eval
eval_kwargs
=
cfg
.
get
(
'evaluation'
,
{}).
copy
()
# hard-code way to remove EvalHook args
for
key
in
[
'interval'
,
'tmpdir'
,
'start'
,
'gpu_collect'
,
'save_best'
,
'rule'
]:
eval_kwargs
.
pop
(
key
,
None
)
eval_kwargs
.
update
(
dict
(
metric
=
args
.
eval
))
print
(
dataset
.
evaluate
(
results
,
**
eval_kwargs
))
if
__name__
==
'__main__'
:
fps
=
main
()
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/benchmark_view_transformer.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
time
import
numpy
as
np
import
torch
from
mmcv
import
Config
from
mmcv.parallel
import
MMDataParallel
from
mmcv.runner
import
load_checkpoint
from
mmdet3d.datasets
import
build_dataloader
,
build_dataset
from
mmdet3d.models
import
build_detector
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'MMDet benchmark a model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--samples'
,
default
=
1000
,
help
=
'samples to benchmark'
)
parser
.
add_argument
(
'--log-interval'
,
default
=
50
,
help
=
'interval of logging'
)
parser
.
add_argument
(
'--mem-only'
,
action
=
'store_true'
,
help
=
'Conduct the memory analysis only'
)
parser
.
add_argument
(
'--no-acceleration'
,
action
=
'store_true'
,
help
=
'Omit the pre-computation acceleration'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
cfg
.
model
.
pretrained
=
None
cfg
.
data
.
test
.
test_mode
=
True
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
cfg
.
data
.
workers_per_gpu
,
dist
=
False
,
shuffle
=
False
)
# build the model and load checkpoint
if
not
args
.
no_acceleration
:
cfg
.
model
.
img_view_transformer
.
accelerate
=
True
cfg
.
model
.
train_cfg
=
None
assert
cfg
.
model
.
type
==
'BEVDet'
,
\
'Please use class BEVDet for '
\
'view transformation inference '
\
'speed estimation instead of %s'
%
cfg
.
model
.
type
model
=
build_detector
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
load_checkpoint
(
model
,
args
.
checkpoint
,
map_location
=
'cpu'
)
model
=
MMDataParallel
(
model
,
device_ids
=
[
0
])
model
.
eval
()
# the first several iterations may be very slow so skip them
num_warmup
=
100
pure_inf_time
=
0
D
=
model
.
module
.
img_view_transformer
.
D
out_channels
=
model
.
module
.
img_view_transformer
.
out_channels
depth_net
=
model
.
module
.
img_view_transformer
.
depth_net
view_transformer
=
model
.
module
.
img_view_transformer
# benchmark with several samples and take the average
for
i
,
data
in
enumerate
(
data_loader
):
with
torch
.
no_grad
():
img_feat
,
_
=
\
model
.
module
.
image_encoder
(
data
[
'img_inputs'
][
0
][
0
].
cuda
())
B
,
N
,
C
,
H
,
W
=
img_feat
.
shape
x
=
depth_net
(
img_feat
.
reshape
(
B
*
N
,
C
,
H
,
W
))
depth_digit
=
x
[:,
:
D
,
...]
tran_feat
=
x
[:,
D
:
D
+
out_channels
,
...]
depth
=
depth_digit
.
softmax
(
dim
=
1
)
input
=
[
img_feat
]
+
[
d
.
cuda
()
for
d
in
data
[
'img_inputs'
][
0
][
1
:]]
if
i
==
0
:
precomputed_memory_allocated
=
0.0
if
view_transformer
.
accelerate
:
start_mem_allocated
=
torch
.
cuda
.
memory_allocated
()
view_transformer
.
pre_compute
(
input
)
end_mem_allocated
=
torch
.
cuda
.
memory_allocated
()
precomputed_memory_allocated
=
\
end_mem_allocated
-
start_mem_allocated
ref_max_mem_allocated
=
torch
.
cuda
.
max_memory_allocated
()
# occupy the memory
size
=
(
ref_max_mem_allocated
-
end_mem_allocated
)
//
4
occupy_tensor
=
torch
.
zeros
(
size
=
(
size
,
),
device
=
'cuda'
,
dtype
=
torch
.
float32
)
print
(
'Memory analysis:
\n
'
'precomputed_memory_allocated : %d B / %.01f MB
\n
'
%
(
precomputed_memory_allocated
,
precomputed_memory_allocated
/
1024
/
1024
))
start_mem_allocated
=
torch
.
cuda
.
memory_allocated
()
bev_feat
=
view_transformer
.
view_transform_core
(
input
,
depth
,
tran_feat
)[
0
]
end_max_mem_allocated
=
torch
.
cuda
.
max_memory_allocated
()
peak_memory_allocated
=
\
end_max_mem_allocated
-
start_mem_allocated
total_memory_requirement
=
\
precomputed_memory_allocated
+
peak_memory_allocated
print
(
'Memory analysis:
\n
'
'Memory requirement : %d B / %.01f MB
\n
'
%
(
total_memory_requirement
,
total_memory_requirement
/
1024
/
1024
))
if
args
.
mem_only
:
return
torch
.
cuda
.
synchronize
()
start_time
=
time
.
perf_counter
()
with
torch
.
no_grad
():
view_transformer
.
view_transform
(
input
,
depth
,
tran_feat
)[
0
]
torch
.
cuda
.
synchronize
()
elapsed
=
time
.
perf_counter
()
-
start_time
if
i
>=
num_warmup
:
pure_inf_time
+=
elapsed
if
(
i
+
1
)
%
args
.
log_interval
==
0
:
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Done image [
{
i
+
1
:
<
3
}
/
{
args
.
samples
}
], '
f
'fps:
{
fps
:.
1
f
}
img / s'
)
if
(
i
+
1
)
==
args
.
samples
:
pure_inf_time
+=
elapsed
fps
=
(
i
+
1
-
num_warmup
)
/
pure_inf_time
print
(
f
'Overall fps:
{
fps
:.
1
f
}
img / s'
)
return
fps
if
__name__
==
'__main__'
:
repeat_times
=
1
fps_list
=
[]
for
_
in
range
(
repeat_times
):
fps
=
main
()
time
.
sleep
(
5
)
fps_list
.
append
(
fps
)
fps_list
=
np
.
array
(
fps_list
,
dtype
=
np
.
float32
)
print
(
f
'Mean Overall fps:
{
fps_list
.
mean
():.
4
f
}
+'
f
'
{
np
.
sqrt
(
fps_list
.
var
()):.
4
f
}
img / s'
)
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/get_flops.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
os
import
sys
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
import
torch
from
mmcv
import
Config
,
DictAction
from
mmdet3d.models
import
build_model
try
:
from
mmcv.cnn
import
get_model_complexity_info
except
ImportError
:
raise
ImportError
(
'Please upgrade mmcv to >0.6.2'
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Train a detector'
)
parser
.
add_argument
(
'config'
,
help
=
'train config file path'
)
parser
.
add_argument
(
'--shape'
,
type
=
int
,
nargs
=
'+'
,
default
=
[
40000
,
4
],
help
=
'input point cloud size'
)
parser
.
add_argument
(
'--modality'
,
type
=
str
,
default
=
'point'
,
choices
=
[
'point'
,
'image'
,
'multi'
],
help
=
'input data modality'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.'
)
args
=
parser
.
parse_args
()
return
args
def
construct_input
(
input_shape
):
rot
=
torch
.
eye
(
4
).
float
().
cuda
().
view
(
1
,
1
,
4
,
4
).
expand
(
1
,
6
,
4
,
4
)
intrins
=
torch
.
eye
(
3
).
float
().
cuda
().
view
(
1
,
1
,
3
,
3
).
expand
(
1
,
6
,
3
,
3
)
input
=
dict
(
img_inputs
=
[
torch
.
ones
(()).
new_empty
((
1
,
6
,
*
input_shape
)).
cuda
(),
rot
,
rot
,
intrins
,
intrins
,
torch
.
ones
((
1
,
6
,
3
)).
cuda
(),
torch
.
eye
(
3
).
float
().
cuda
().
view
(
1
,
3
,
3
)
])
return
input
def
main
():
args
=
parse_args
()
if
args
.
modality
==
'point'
:
assert
len
(
args
.
shape
)
==
2
,
'invalid input shape'
input_shape
=
tuple
(
args
.
shape
)
elif
args
.
modality
==
'image'
:
if
len
(
args
.
shape
)
==
1
:
input_shape
=
(
3
,
args
.
shape
[
0
],
args
.
shape
[
0
])
elif
len
(
args
.
shape
)
==
2
:
input_shape
=
(
3
,
)
+
tuple
(
args
.
shape
)
else
:
raise
ValueError
(
'invalid input shape'
)
elif
args
.
modality
==
'multi'
:
raise
NotImplementedError
(
'FLOPs counter is currently not supported for models with '
'multi-modality input'
)
cfg
=
Config
.
fromfile
(
args
.
config
)
# if 'stereo' in args.config or 'longterm' in args.config:
# assert False,'Config has not supported: %s ' % args.config
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
model
=
build_model
(
cfg
.
model
,
train_cfg
=
cfg
.
get
(
'train_cfg'
),
test_cfg
=
cfg
.
get
(
'test_cfg'
))
if
torch
.
cuda
.
is_available
():
model
.
cuda
()
model
.
eval
()
if
hasattr
(
model
,
'forward_dummy'
):
model
.
forward
=
model
.
forward_dummy
else
:
raise
NotImplementedError
(
'FLOPs counter is currently not supported for {}'
.
format
(
model
.
__class__
.
__name__
))
flops
,
params
=
get_model_complexity_info
(
model
,
input_shape
,
input_constructor
=
construct_input
)
split_line
=
'='
*
30
print
(
f
'
{
split_line
}
\n
Input shape:
{
input_shape
}
\n
'
f
'Flops:
{
flops
}
\n
Params:
{
params
}
\n
{
split_line
}
'
)
print
(
'!!!Please be cautious if you use the results in papers. '
'You may need to check if all ops are supported and verify that the '
'flops computation is correct.'
)
if
__name__
==
'__main__'
:
main
()
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis.py
0 → 100644
View file @
d2b71343
# Copyright (c) Phigent Robotics. All rights reserved.
import
argparse
import
json
import
os
import
pickle
import
cv2
import
numpy
as
np
from
pyquaternion.quaternion
import
Quaternion
from
mmdet3d.core.bbox.structures.lidar_box3d
import
LiDARInstance3DBoxes
as
LB
def
check_point_in_img
(
points
,
height
,
width
):
valid
=
np
.
logical_and
(
points
[:,
0
]
>=
0
,
points
[:,
1
]
>=
0
)
valid
=
np
.
logical_and
(
valid
,
np
.
logical_and
(
points
[:,
0
]
<
width
,
points
[:,
1
]
<
height
))
return
valid
def
depth2color
(
depth
):
gray
=
max
(
0
,
min
((
depth
+
2.5
)
/
3.0
,
1.0
))
max_lumi
=
200
colors
=
np
.
array
(
[[
max_lumi
,
0
,
max_lumi
],
[
max_lumi
,
0
,
0
],
[
max_lumi
,
max_lumi
,
0
],
[
0
,
max_lumi
,
0
],
[
0
,
max_lumi
,
max_lumi
],
[
0
,
0
,
max_lumi
]],
dtype
=
np
.
float32
)
if
gray
==
1
:
return
tuple
(
colors
[
-
1
].
tolist
())
num_rank
=
len
(
colors
)
-
1
rank
=
np
.
floor
(
gray
*
num_rank
).
astype
(
np
.
int
)
diff
=
(
gray
-
rank
/
num_rank
)
*
num_rank
return
tuple
(
(
colors
[
rank
]
+
(
colors
[
rank
+
1
]
-
colors
[
rank
])
*
diff
).
tolist
())
def
lidar2img
(
points_lidar
,
camrera_info
):
points_lidar_homogeneous
=
\
np
.
concatenate
([
points_lidar
,
np
.
ones
((
points_lidar
.
shape
[
0
],
1
),
dtype
=
points_lidar
.
dtype
)],
axis
=
1
)
camera2lidar
=
np
.
eye
(
4
,
dtype
=
np
.
float32
)
camera2lidar
[:
3
,
:
3
]
=
camrera_info
[
'sensor2lidar_rotation'
]
camera2lidar
[:
3
,
3
]
=
camrera_info
[
'sensor2lidar_translation'
]
lidar2camera
=
np
.
linalg
.
inv
(
camera2lidar
)
points_camera_homogeneous
=
points_lidar_homogeneous
@
lidar2camera
.
T
points_camera
=
points_camera_homogeneous
[:,
:
3
]
valid
=
np
.
ones
((
points_camera
.
shape
[
0
]),
dtype
=
bool
)
valid
=
np
.
logical_and
(
points_camera
[:,
-
1
]
>
0.5
,
valid
)
points_camera
=
points_camera
/
points_camera
[:,
2
:
3
]
camera2img
=
camrera_info
[
'cam_intrinsic'
]
points_img
=
points_camera
@
camera2img
.
T
points_img
=
points_img
[:,
:
2
]
return
points_img
,
valid
def
get_lidar2global
(
infos
):
lidar2ego
=
np
.
eye
(
4
,
dtype
=
np
.
float32
)
lidar2ego
[:
3
,
:
3
]
=
Quaternion
(
infos
[
'lidar2ego_rotation'
]).
rotation_matrix
lidar2ego
[:
3
,
3
]
=
infos
[
'lidar2ego_translation'
]
ego2global
=
np
.
eye
(
4
,
dtype
=
np
.
float32
)
ego2global
[:
3
,
:
3
]
=
Quaternion
(
infos
[
'ego2global_rotation'
]).
rotation_matrix
ego2global
[:
3
,
3
]
=
infos
[
'ego2global_translation'
]
return
ego2global
@
lidar2ego
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Visualize the predicted '
'result of nuScenes'
)
parser
.
add_argument
(
'res'
,
help
=
'Path to the predicted result in json format'
)
parser
.
add_argument
(
'--show-range'
,
type
=
int
,
default
=
50
,
help
=
'Range of visualization in BEV'
)
parser
.
add_argument
(
'--canva-size'
,
type
=
int
,
default
=
1000
,
help
=
'Size of canva in pixel'
)
parser
.
add_argument
(
'--vis-frames'
,
type
=
int
,
default
=
500
,
help
=
'Number of frames for visualization'
)
parser
.
add_argument
(
'--scale-factor'
,
type
=
int
,
default
=
4
,
help
=
'Trade-off between image-view and bev in size of '
'the visualized canvas'
)
parser
.
add_argument
(
'--vis-thred'
,
type
=
float
,
default
=
0.3
,
help
=
'Threshold the predicted results'
)
parser
.
add_argument
(
'--draw-gt'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
default
=
'val'
,
help
=
'Version of nuScenes dataset'
)
parser
.
add_argument
(
'--root_path'
,
type
=
str
,
default
=
'./data/nuscenes'
,
help
=
'Path to nuScenes dataset'
)
parser
.
add_argument
(
'--save_path'
,
type
=
str
,
default
=
'./vis'
,
help
=
'Path to save visualization results'
)
parser
.
add_argument
(
'--format'
,
type
=
str
,
default
=
'video'
,
choices
=
[
'video'
,
'image'
],
help
=
'The desired format of the visualization result'
)
parser
.
add_argument
(
'--fps'
,
type
=
int
,
default
=
20
,
help
=
'Frame rate of video'
)
parser
.
add_argument
(
'--video-prefix'
,
type
=
str
,
default
=
'vis'
,
help
=
'name of video'
)
args
=
parser
.
parse_args
()
return
args
color_map
=
{
0
:
(
255
,
255
,
0
),
1
:
(
0
,
255
,
255
)}
def
main
():
args
=
parse_args
()
# load predicted results
res
=
json
.
load
(
open
(
args
.
res
,
'r'
))
# load dataset information
info_path
=
\
args
.
root_path
+
'/bevdetv2-nuscenes_infos_%s.pkl'
%
args
.
version
dataset
=
pickle
.
load
(
open
(
info_path
,
'rb'
))
# prepare save path and medium
vis_dir
=
args
.
save_path
if
not
os
.
path
.
exists
(
vis_dir
):
os
.
makedirs
(
vis_dir
)
print
(
'saving visualized result to %s'
%
vis_dir
)
scale_factor
=
args
.
scale_factor
canva_size
=
args
.
canva_size
show_range
=
args
.
show_range
if
args
.
format
==
'video'
:
fourcc
=
cv2
.
VideoWriter_fourcc
(
*
'MP4V'
)
vout
=
cv2
.
VideoWriter
(
os
.
path
.
join
(
vis_dir
,
'%s.mp4'
%
args
.
video_prefix
),
fourcc
,
args
.
fps
,
(
int
(
1600
/
scale_factor
*
3
),
int
(
900
/
scale_factor
*
2
+
canva_size
)))
draw_boxes_indexes_bev
=
[(
0
,
1
),
(
1
,
2
),
(
2
,
3
),
(
3
,
0
)]
draw_boxes_indexes_img_view
=
[(
0
,
1
),
(
1
,
2
),
(
2
,
3
),
(
3
,
0
),
(
4
,
5
),
(
5
,
6
),
(
6
,
7
),
(
7
,
4
),
(
0
,
4
),
(
1
,
5
),
(
2
,
6
),
(
3
,
7
)]
views
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
]
print
(
'start visualizing results'
)
for
cnt
,
infos
in
enumerate
(
dataset
[
'infos'
][:
min
(
args
.
vis_frames
,
len
(
dataset
[
'infos'
]))]):
if
cnt
%
10
==
0
:
print
(
'%d/%d'
%
(
cnt
,
min
(
args
.
vis_frames
,
len
(
dataset
[
'infos'
]))))
# collect instances
pred_res
=
res
[
'results'
][
infos
[
'token'
]]
pred_boxes
=
[
pred_res
[
rid
][
'translation'
]
+
pred_res
[
rid
][
'size'
]
+
[
Quaternion
(
pred_res
[
rid
][
'rotation'
]).
yaw_pitch_roll
[
0
]
+
np
.
pi
/
2
]
for
rid
in
range
(
len
(
pred_res
))
]
if
len
(
pred_boxes
)
==
0
:
corners_lidar
=
np
.
zeros
((
0
,
3
),
dtype
=
np
.
float32
)
else
:
pred_boxes
=
np
.
array
(
pred_boxes
,
dtype
=
np
.
float32
)
boxes
=
LB
(
pred_boxes
,
origin
=
(
0.5
,
0.5
,
0.0
))
corners_global
=
boxes
.
corners
.
numpy
().
reshape
(
-
1
,
3
)
corners_global
=
np
.
concatenate
(
[
corners_global
,
np
.
ones
([
corners_global
.
shape
[
0
],
1
])],
axis
=
1
)
l2g
=
get_lidar2global
(
infos
)
corners_lidar
=
corners_global
@
np
.
linalg
.
inv
(
l2g
).
T
corners_lidar
=
corners_lidar
[:,
:
3
]
pred_flag
=
np
.
ones
((
corners_lidar
.
shape
[
0
]
//
8
,
),
dtype
=
np
.
bool
)
scores
=
[
pred_res
[
rid
][
'detection_score'
]
for
rid
in
range
(
len
(
pred_res
))
]
if
args
.
draw_gt
:
gt_boxes
=
infos
[
'gt_boxes'
]
gt_boxes
[:,
-
1
]
=
gt_boxes
[:,
-
1
]
+
np
.
pi
/
2
width
=
gt_boxes
[:,
4
].
copy
()
gt_boxes
[:,
4
]
=
gt_boxes
[:,
3
]
gt_boxes
[:,
3
]
=
width
corners_lidar_gt
=
\
LB
(
infos
[
'gt_boxes'
],
origin
=
(
0.5
,
0.5
,
0.5
)).
corners
.
numpy
().
reshape
(
-
1
,
3
)
corners_lidar
=
np
.
concatenate
([
corners_lidar
,
corners_lidar_gt
],
axis
=
0
)
gt_flag
=
np
.
ones
((
corners_lidar_gt
.
shape
[
0
]
//
8
),
dtype
=
np
.
bool
)
pred_flag
=
np
.
concatenate
(
[
pred_flag
,
np
.
logical_not
(
gt_flag
)],
axis
=
0
)
scores
=
scores
+
[
0
for
_
in
range
(
infos
[
'gt_boxes'
].
shape
[
0
])]
scores
=
np
.
array
(
scores
,
dtype
=
np
.
float32
)
sort_ids
=
np
.
argsort
(
scores
)
# image view
imgs
=
[]
for
view
in
views
:
img
=
cv2
.
imread
(
infos
[
'cams'
][
view
][
'data_path'
])
# draw instances
corners_img
,
valid
=
lidar2img
(
corners_lidar
,
infos
[
'cams'
][
view
])
valid
=
np
.
logical_and
(
valid
,
check_point_in_img
(
corners_img
,
img
.
shape
[
0
],
img
.
shape
[
1
]))
valid
=
valid
.
reshape
(
-
1
,
8
)
corners_img
=
corners_img
.
reshape
(
-
1
,
8
,
2
).
astype
(
np
.
int
)
for
aid
in
range
(
valid
.
shape
[
0
]):
for
index
in
draw_boxes_indexes_img_view
:
if
valid
[
aid
,
index
[
0
]]
and
valid
[
aid
,
index
[
1
]]:
cv2
.
line
(
img
,
tuple
(
corners_img
[
aid
,
index
[
0
]]),
tuple
(
corners_img
[
aid
,
index
[
1
]]),
color
=
color_map
[
int
(
pred_flag
[
aid
])],
thickness
=
scale_factor
)
imgs
.
append
(
img
)
# bird-eye-view
canvas
=
np
.
zeros
((
int
(
canva_size
),
int
(
canva_size
),
3
),
dtype
=
np
.
uint8
)
# draw lidar points
lidar_points
=
np
.
fromfile
(
infos
[
'lidar_path'
],
dtype
=
np
.
float32
)
lidar_points
=
lidar_points
.
reshape
(
-
1
,
5
)[:,
:
3
]
lidar_points
[:,
1
]
=
-
lidar_points
[:,
1
]
lidar_points
[:,
:
2
]
=
\
(
lidar_points
[:,
:
2
]
+
show_range
)
/
show_range
/
2.0
*
canva_size
for
p
in
lidar_points
:
if
check_point_in_img
(
p
.
reshape
(
1
,
3
),
canvas
.
shape
[
1
],
canvas
.
shape
[
0
])[
0
]:
color
=
depth2color
(
p
[
2
])
cv2
.
circle
(
canvas
,
(
int
(
p
[
0
]),
int
(
p
[
1
])),
radius
=
0
,
color
=
color
,
thickness
=
1
)
# draw instances
corners_lidar
=
corners_lidar
.
reshape
(
-
1
,
8
,
3
)
corners_lidar
[:,
:,
1
]
=
-
corners_lidar
[:,
:,
1
]
bottom_corners_bev
=
corners_lidar
[:,
[
0
,
3
,
7
,
4
],
:
2
]
bottom_corners_bev
=
\
(
bottom_corners_bev
+
show_range
)
/
show_range
/
2.0
*
canva_size
bottom_corners_bev
=
np
.
round
(
bottom_corners_bev
).
astype
(
np
.
int32
)
center_bev
=
corners_lidar
[:,
[
0
,
3
,
7
,
4
],
:
2
].
mean
(
axis
=
1
)
head_bev
=
corners_lidar
[:,
[
0
,
4
],
:
2
].
mean
(
axis
=
1
)
canter_canvas
=
\
(
center_bev
+
show_range
)
/
show_range
/
2.0
*
canva_size
center_canvas
=
canter_canvas
.
astype
(
np
.
int32
)
head_canvas
=
(
head_bev
+
show_range
)
/
show_range
/
2.0
*
canva_size
head_canvas
=
head_canvas
.
astype
(
np
.
int32
)
for
rid
in
sort_ids
:
score
=
scores
[
rid
]
if
score
<
args
.
vis_thred
and
pred_flag
[
rid
]:
continue
score
=
min
(
score
*
2.0
,
1.0
)
if
pred_flag
[
rid
]
else
1.0
color
=
color_map
[
int
(
pred_flag
[
rid
])]
for
index
in
draw_boxes_indexes_bev
:
cv2
.
line
(
canvas
,
bottom_corners_bev
[
rid
,
index
[
0
]],
bottom_corners_bev
[
rid
,
index
[
1
]],
[
color
[
0
]
*
score
,
color
[
1
]
*
score
,
color
[
2
]
*
score
],
thickness
=
1
)
cv2
.
line
(
canvas
,
center_canvas
[
rid
],
head_canvas
[
rid
],
[
color
[
0
]
*
score
,
color
[
1
]
*
score
,
color
[
2
]
*
score
],
1
,
lineType
=
8
)
# fuse image-view and bev
img
=
np
.
zeros
((
900
*
2
+
canva_size
*
scale_factor
,
1600
*
3
,
3
),
dtype
=
np
.
uint8
)
img
[:
900
,
:,
:]
=
np
.
concatenate
(
imgs
[:
3
],
axis
=
1
)
img_back
=
np
.
concatenate
(
[
imgs
[
3
][:,
::
-
1
,
:],
imgs
[
4
][:,
::
-
1
,
:],
imgs
[
5
][:,
::
-
1
,
:]],
axis
=
1
)
img
[
900
+
canva_size
*
scale_factor
:,
:,
:]
=
img_back
img
=
cv2
.
resize
(
img
,
(
int
(
1600
/
scale_factor
*
3
),
int
(
900
/
scale_factor
*
2
+
canva_size
)))
w_begin
=
int
((
1600
*
3
/
scale_factor
-
canva_size
)
//
2
)
img
[
int
(
900
/
scale_factor
):
int
(
900
/
scale_factor
)
+
canva_size
,
w_begin
:
w_begin
+
canva_size
,
:]
=
canvas
if
args
.
format
==
'image'
:
cv2
.
imwrite
(
os
.
path
.
join
(
vis_dir
,
'%s.jpg'
%
infos
[
'token'
]),
img
)
elif
args
.
format
==
'video'
:
vout
.
write
(
img
)
if
args
.
format
==
'video'
:
vout
.
release
()
if
__name__
==
'__main__'
:
main
()
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/vis_occ.py
0 → 100644
View file @
d2b71343
import
os
import
mmcv
import
open3d
as
o3d
import
numpy
as
np
import
torch
import
pickle
import
math
from
typing
import
Tuple
,
List
,
Dict
,
Iterable
import
argparse
import
cv2
NOT_OBSERVED
=
-
1
FREE
=
0
OCCUPIED
=
1
FREE_LABEL
=
17
BINARY_OBSERVED
=
1
BINARY_NOT_OBSERVED
=
0
VOXEL_SIZE
=
[
0.4
,
0.4
,
0.4
]
POINT_CLOUD_RANGE
=
[
-
40
,
-
40
,
-
1
,
40
,
40
,
5.4
]
SPTIAL_SHAPE
=
[
200
,
200
,
16
]
TGT_VOXEL_SIZE
=
[
0.4
,
0.4
,
0.4
]
TGT_POINT_CLOUD_RANGE
=
[
-
40
,
-
40
,
-
1
,
40
,
40
,
5.4
]
colormap_to_colors
=
np
.
array
(
[
[
0
,
0
,
0
,
255
],
# 0 undefined
[
112
,
128
,
144
,
255
],
# 1 barrier orange
[
220
,
20
,
60
,
255
],
# 2 bicycle Blue
[
255
,
127
,
80
,
255
],
# 3 bus Darkslategrey
[
255
,
158
,
0
,
255
],
# 4 car Crimson
[
233
,
150
,
70
,
255
],
# 5 cons. Veh Orangered
[
255
,
61
,
99
,
255
],
# 6 motorcycle Darkorange
[
0
,
0
,
230
,
255
],
# 7 pedestrian Darksalmon
[
47
,
79
,
79
,
255
],
# 8 traffic cone Red
[
255
,
140
,
0
,
255
],
# 9 trailer Slategrey
[
255
,
99
,
71
,
255
],
# 10 truck Burlywood
[
0
,
207
,
191
,
255
],
# 11 drive sur Green
[
175
,
0
,
75
,
255
],
# 12 other lat nuTonomy green
[
75
,
0
,
75
,
255
],
# 13 sidewalk
[
112
,
180
,
60
,
255
],
# 14 terrain
[
222
,
184
,
135
,
255
],
# 15 manmade
[
0
,
175
,
0
,
255
],
# 16 vegeyation
],
dtype
=
np
.
float32
)
def
voxel2points
(
voxel
,
occ_show
,
voxelSize
):
"""
Args:
voxel: (Dx, Dy, Dz)
occ_show: (Dx, Dy, Dz)
voxelSize: (dx, dy, dz)
Returns:
points: (N, 3) 3: (x, y, z)
voxel: (N, ) cls_id
occIdx: (x_idx, y_idx, z_idx)
"""
occIdx
=
torch
.
where
(
occ_show
)
points
=
torch
.
cat
((
occIdx
[
0
][:,
None
]
*
voxelSize
[
0
]
+
POINT_CLOUD_RANGE
[
0
],
\
occIdx
[
1
][:,
None
]
*
voxelSize
[
1
]
+
POINT_CLOUD_RANGE
[
1
],
\
occIdx
[
2
][:,
None
]
*
voxelSize
[
2
]
+
POINT_CLOUD_RANGE
[
2
]),
dim
=
1
)
# (N, 3) 3: (x, y, z)
return
points
,
voxel
[
occIdx
],
occIdx
def
voxel_profile
(
voxel
,
voxel_size
):
"""
Args:
voxel: (N, 3) 3:(x, y, z)
voxel_size: (vx, vy, vz)
Returns:
box: (N, 7) (x, y, z - dz/2, vx, vy, vz, 0)
"""
centers
=
torch
.
cat
((
voxel
[:,
:
2
],
voxel
[:,
2
][:,
None
]
-
voxel_size
[
2
]
/
2
),
dim
=
1
)
# (x, y, z - dz/2)
# centers = voxel
wlh
=
torch
.
cat
((
torch
.
tensor
(
voxel_size
[
0
]).
repeat
(
centers
.
shape
[
0
])[:,
None
],
torch
.
tensor
(
voxel_size
[
1
]).
repeat
(
centers
.
shape
[
0
])[:,
None
],
torch
.
tensor
(
voxel_size
[
2
]).
repeat
(
centers
.
shape
[
0
])[:,
None
]),
dim
=
1
)
yaw
=
torch
.
full_like
(
centers
[:,
0
:
1
],
0
)
return
torch
.
cat
((
centers
,
wlh
,
yaw
),
dim
=
1
)
def
rotz
(
t
):
"""Rotation about the z-axis."""
c
=
torch
.
cos
(
t
)
s
=
torch
.
sin
(
t
)
return
torch
.
tensor
([[
c
,
-
s
,
0
],
[
s
,
c
,
0
],
[
0
,
0
,
1
]])
def
my_compute_box_3d
(
center
,
size
,
heading_angle
):
"""
Args:
center: (N, 3) 3: (x, y, z - dz/2)
size: (N, 3) 3: (vx, vy, vz)
heading_angle: (N, 1)
Returns:
corners_3d: (N, 8, 3)
"""
h
,
w
,
l
=
size
[:,
2
],
size
[:,
0
],
size
[:,
1
]
center
[:,
2
]
=
center
[:,
2
]
+
h
/
2
l
,
w
,
h
=
(
l
/
2
).
unsqueeze
(
1
),
(
w
/
2
).
unsqueeze
(
1
),
(
h
/
2
).
unsqueeze
(
1
)
x_corners
=
torch
.
cat
([
-
l
,
l
,
l
,
-
l
,
-
l
,
l
,
l
,
-
l
],
dim
=
1
)[...,
None
]
y_corners
=
torch
.
cat
([
w
,
w
,
-
w
,
-
w
,
w
,
w
,
-
w
,
-
w
],
dim
=
1
)[...,
None
]
z_corners
=
torch
.
cat
([
h
,
h
,
h
,
h
,
-
h
,
-
h
,
-
h
,
-
h
],
dim
=
1
)[...,
None
]
corners_3d
=
torch
.
cat
([
x_corners
,
y_corners
,
z_corners
],
dim
=
2
)
corners_3d
[...,
0
]
+=
center
[:,
0
:
1
]
corners_3d
[...,
1
]
+=
center
[:,
1
:
2
]
corners_3d
[...,
2
]
+=
center
[:,
2
:
3
]
return
corners_3d
def
show_point_cloud
(
points
:
np
.
ndarray
,
colors
=
True
,
points_colors
=
None
,
bbox3d
=
None
,
voxelize
=
False
,
bbox_corners
=
None
,
linesets
=
None
,
vis
=
None
,
offset
=
[
0
,
0
,
0
],
large_voxel
=
True
,
voxel_size
=
0.4
):
"""
:param points: (N, 3) 3:(x, y, z)
:param colors: false 不显示点云颜色
:param points_colors: (N, 4)
:param bbox3d: voxel grid (N, 7) 7: (center, wlh, yaw=0)
:param voxelize: false 不显示voxel边界
:param bbox_corners: (N, 8, 3) voxel grid 角点坐标, 用于绘制voxel grid 边界.
:param linesets: 用于绘制voxel grid 边界.
:return:
"""
if
vis
is
None
:
vis
=
o3d
.
visualization
.
VisualizerWithKeyCallback
()
vis
.
create_window
()
if
isinstance
(
offset
,
list
)
or
isinstance
(
offset
,
tuple
):
offset
=
np
.
array
(
offset
)
pcd
=
o3d
.
geometry
.
PointCloud
()
pcd
.
points
=
o3d
.
utility
.
Vector3dVector
(
points
+
offset
)
if
colors
:
pcd
.
colors
=
o3d
.
utility
.
Vector3dVector
(
points_colors
[:,
:
3
])
mesh_frame
=
o3d
.
geometry
.
TriangleMesh
.
create_coordinate_frame
(
size
=
1
,
origin
=
[
0
,
0
,
0
])
voxelGrid
=
o3d
.
geometry
.
VoxelGrid
.
create_from_point_cloud
(
pcd
,
voxel_size
=
voxel_size
)
if
large_voxel
:
vis
.
add_geometry
(
voxelGrid
)
else
:
vis
.
add_geometry
(
pcd
)
if
voxelize
:
line_sets
=
o3d
.
geometry
.
LineSet
()
line_sets
.
points
=
o3d
.
open3d
.
utility
.
Vector3dVector
(
bbox_corners
.
reshape
((
-
1
,
3
))
+
offset
)
line_sets
.
lines
=
o3d
.
open3d
.
utility
.
Vector2iVector
(
linesets
.
reshape
((
-
1
,
2
)))
line_sets
.
paint_uniform_color
((
0
,
0
,
0
))
vis
.
add_geometry
(
line_sets
)
vis
.
add_geometry
(
mesh_frame
)
# ego_pcd = o3d.geometry.PointCloud()
# ego_points = generate_the_ego_car()
# ego_pcd.points = o3d.utility.Vector3dVector(ego_points)
# vis.add_geometry(ego_pcd)
return
vis
def
show_occ
(
occ_state
,
occ_show
,
voxel_size
,
vis
=
None
,
offset
=
[
0
,
0
,
0
]):
"""
Args:
occ_state: (Dx, Dy, Dz), cls_id
occ_show: (Dx, Dy, Dz), bool
voxel_size: [0.4, 0.4, 0.4]
vis: Visualizer
offset:
Returns:
"""
colors
=
colormap_to_colors
/
255
pcd
,
labels
,
occIdx
=
voxel2points
(
occ_state
,
occ_show
,
voxel_size
)
# pcd: (N, 3) 3: (x, y, z)
# labels: (N, ) cls_id
_labels
=
labels
%
len
(
colors
)
pcds_colors
=
colors
[
_labels
]
# (N, 4)
bboxes
=
voxel_profile
(
pcd
,
voxel_size
)
# (N, 7) 7: (x, y, z - dz/2, dx, dy, dz, 0)
bboxes_corners
=
my_compute_box_3d
(
bboxes
[:,
0
:
3
],
bboxes
[:,
3
:
6
],
bboxes
[:,
6
:
7
])
# (N, 8, 3)
bases_
=
torch
.
arange
(
0
,
bboxes_corners
.
shape
[
0
]
*
8
,
8
)
edges
=
torch
.
tensor
([[
0
,
1
],
[
1
,
2
],
[
2
,
3
],
[
3
,
0
],
[
4
,
5
],
[
5
,
6
],
[
6
,
7
],
[
7
,
4
],
[
0
,
4
],
[
1
,
5
],
[
2
,
6
],
[
3
,
7
]])
# lines along y-axis
edges
=
edges
.
reshape
((
1
,
12
,
2
)).
repeat
(
bboxes_corners
.
shape
[
0
],
1
,
1
)
# (N, 12, 2)
# (N, 12, 2) + (N, 1, 1) --> (N, 12, 2) 此时edges中记录的是bboxes_corners的整体id: (0, N*8).
edges
=
edges
+
bases_
[:,
None
,
None
]
vis
=
show_point_cloud
(
points
=
pcd
.
numpy
(),
colors
=
True
,
points_colors
=
pcds_colors
,
voxelize
=
True
,
bbox3d
=
bboxes
.
numpy
(),
bbox_corners
=
bboxes_corners
.
numpy
(),
linesets
=
edges
.
numpy
(),
vis
=
vis
,
offset
=
offset
,
large_voxel
=
True
,
voxel_size
=
0.4
)
return
vis
def
generate_the_ego_car
():
ego_range
=
[
-
2
,
-
1
,
0
,
2
,
1
,
1.5
]
ego_voxel_size
=
[
0.1
,
0.1
,
0.1
]
ego_xdim
=
int
((
ego_range
[
3
]
-
ego_range
[
0
])
/
ego_voxel_size
[
0
])
ego_ydim
=
int
((
ego_range
[
4
]
-
ego_range
[
1
])
/
ego_voxel_size
[
1
])
ego_zdim
=
int
((
ego_range
[
5
]
-
ego_range
[
2
])
/
ego_voxel_size
[
2
])
temp_x
=
np
.
arange
(
ego_xdim
)
temp_y
=
np
.
arange
(
ego_ydim
)
temp_z
=
np
.
arange
(
ego_zdim
)
ego_xyz
=
np
.
stack
(
np
.
meshgrid
(
temp_y
,
temp_x
,
temp_z
),
axis
=-
1
).
reshape
(
-
1
,
3
)
ego_point_x
=
(
ego_xyz
[:,
0
:
1
]
+
0.5
)
/
ego_xdim
*
(
ego_range
[
3
]
-
ego_range
[
0
])
+
ego_range
[
0
]
ego_point_y
=
(
ego_xyz
[:,
1
:
2
]
+
0.5
)
/
ego_ydim
*
(
ego_range
[
4
]
-
ego_range
[
1
])
+
ego_range
[
1
]
ego_point_z
=
(
ego_xyz
[:,
2
:
3
]
+
0.5
)
/
ego_zdim
*
(
ego_range
[
5
]
-
ego_range
[
2
])
+
ego_range
[
2
]
ego_point_xyz
=
np
.
concatenate
((
ego_point_y
,
ego_point_x
,
ego_point_z
),
axis
=-
1
)
ego_points_label
=
(
np
.
ones
((
ego_point_xyz
.
shape
[
0
]))
*
16
).
astype
(
np
.
uint8
)
ego_dict
=
{}
ego_dict
[
'point'
]
=
ego_point_xyz
ego_dict
[
'label'
]
=
ego_points_label
return
ego_point_xyz
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Visualize the predicted '
'result of nuScenes'
)
parser
.
add_argument
(
'res'
,
help
=
'Path to the predicted result'
)
parser
.
add_argument
(
'--canva-size'
,
type
=
int
,
default
=
1000
,
help
=
'Size of canva in pixel'
)
parser
.
add_argument
(
'--vis-frames'
,
type
=
int
,
default
=
500
,
help
=
'Number of frames for visualization'
)
parser
.
add_argument
(
'--scale-factor'
,
type
=
int
,
default
=
4
,
help
=
'Trade-off between image-view and bev in size of '
'the visualized canvas'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
default
=
'val'
,
help
=
'Version of nuScenes dataset'
)
parser
.
add_argument
(
'--draw-gt'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--root_path'
,
type
=
str
,
default
=
'./data/nuscenes'
,
help
=
'Path to nuScenes dataset'
)
parser
.
add_argument
(
'--save_path'
,
type
=
str
,
default
=
'./vis'
,
help
=
'Path to save visualization results'
)
parser
.
add_argument
(
'--format'
,
type
=
str
,
default
=
'image'
,
choices
=
[
'video'
,
'image'
],
help
=
'The desired format of the visualization result'
)
parser
.
add_argument
(
'--fps'
,
type
=
int
,
default
=
10
,
help
=
'Frame rate of video'
)
parser
.
add_argument
(
'--video-prefix'
,
type
=
str
,
default
=
'vis'
,
help
=
'name of video'
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
# load predicted results
results_dir
=
args
.
res
# load dataset information
info_path
=
\
args
.
root_path
+
'/bevdetv2-nuscenes_infos_%s.pkl'
%
args
.
version
dataset
=
pickle
.
load
(
open
(
info_path
,
'rb'
))
# prepare save path and medium
vis_dir
=
args
.
save_path
if
not
os
.
path
.
exists
(
vis_dir
):
os
.
makedirs
(
vis_dir
)
print
(
'saving visualized result to %s'
%
vis_dir
)
scale_factor
=
args
.
scale_factor
canva_size
=
args
.
canva_size
if
args
.
format
==
'video'
:
fourcc
=
cv2
.
VideoWriter_fourcc
(
'm'
,
'p'
,
'4'
,
'v'
)
vout
=
cv2
.
VideoWriter
(
os
.
path
.
join
(
vis_dir
,
'%s.mp4'
%
args
.
video_prefix
),
fourcc
,
args
.
fps
,
(
int
(
1600
/
scale_factor
*
3
),
int
(
900
/
scale_factor
*
2
+
canva_size
)))
views
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
]
print
(
'start visualizing results'
)
vis
=
o3d
.
visualization
.
VisualizerWithKeyCallback
()
vis
.
create_window
()
for
cnt
,
info
in
enumerate
(
dataset
[
'infos'
][:
min
(
args
.
vis_frames
,
len
(
dataset
[
'infos'
]))]):
if
cnt
%
10
==
0
:
print
(
'%d/%d'
%
(
cnt
,
min
(
args
.
vis_frames
,
len
(
dataset
[
'infos'
]))))
scene_name
=
info
[
'scene_name'
]
sample_token
=
info
[
'token'
]
pred_occ_path
=
os
.
path
.
join
(
results_dir
,
scene_name
,
sample_token
,
'pred.npz'
)
gt_occ_path
=
info
[
'occ_path'
]
pred_occ
=
np
.
load
(
pred_occ_path
)[
'pred'
]
gt_data
=
np
.
load
(
os
.
path
.
join
(
args
.
root_path
,
gt_occ_path
,
'labels.npz'
))
voxel_label
=
gt_data
[
'semantics'
]
lidar_mask
=
gt_data
[
'mask_lidar'
]
camera_mask
=
gt_data
[
'mask_camera'
]
# load imgs
imgs
=
[]
for
view
in
views
:
img
=
cv2
.
imread
(
info
[
'cams'
][
view
][
'data_path'
])
imgs
.
append
(
img
)
# occ_canvas
voxel_show
=
np
.
logical_and
(
pred_occ
!=
FREE_LABEL
,
camera_mask
)
# voxel_show = pred_occ != FREE_LABEL
voxel_size
=
VOXEL_SIZE
vis
=
show_occ
(
torch
.
from_numpy
(
pred_occ
),
torch
.
from_numpy
(
voxel_show
),
voxel_size
=
voxel_size
,
vis
=
vis
,
offset
=
[
0
,
pred_occ
.
shape
[
0
]
*
voxel_size
[
0
]
*
1.2
*
0
,
0
])
if
args
.
draw_gt
:
voxel_show
=
np
.
logical_and
(
voxel_label
!=
FREE_LABEL
,
camera_mask
)
vis
=
show_occ
(
torch
.
from_numpy
(
voxel_label
),
torch
.
from_numpy
(
voxel_show
),
voxel_size
=
voxel_size
,
vis
=
vis
,
offset
=
[
0
,
voxel_label
.
shape
[
0
]
*
voxel_size
[
0
]
*
1.2
*
1
,
0
])
view_control
=
vis
.
get_view_control
()
look_at
=
np
.
array
([
-
0.185
,
0.513
,
3.485
])
front
=
np
.
array
([
-
0.974
,
-
0.055
,
0.221
])
up
=
np
.
array
([
0.221
,
0.014
,
0.975
])
zoom
=
np
.
array
([
0.08
])
view_control
.
set_lookat
(
look_at
)
view_control
.
set_front
(
front
)
view_control
.
set_up
(
up
)
view_control
.
set_zoom
(
zoom
)
opt
=
vis
.
get_render_option
()
opt
.
background_color
=
np
.
asarray
([
1
,
1
,
1
])
opt
.
line_width
=
5
vis
.
poll_events
()
vis
.
update_renderer
()
vis
.
run
()
# if args.format == 'image':
# out_dir = os.path.join(vis_dir, f'{scene_name}', f'{sample_token}')
# mmcv.mkdir_or_exist(out_dir)
# vis.capture_screen_image(os.path.join(out_dir, 'screen_occ.png'), do_render=True)
occ_canvas
=
vis
.
capture_screen_float_buffer
(
do_render
=
True
)
occ_canvas
=
np
.
asarray
(
occ_canvas
)
occ_canvas
=
(
occ_canvas
*
255
).
astype
(
np
.
uint8
)
occ_canvas
=
occ_canvas
[...,
[
2
,
1
,
0
]]
occ_canvas_resize
=
cv2
.
resize
(
occ_canvas
,
(
canva_size
,
canva_size
),
interpolation
=
cv2
.
INTER_CUBIC
)
vis
.
clear_geometries
()
big_img
=
np
.
zeros
((
900
*
2
+
canva_size
*
scale_factor
,
1600
*
3
,
3
),
dtype
=
np
.
uint8
)
big_img
[:
900
,
:,
:]
=
np
.
concatenate
(
imgs
[:
3
],
axis
=
1
)
img_back
=
np
.
concatenate
(
[
imgs
[
3
][:,
::
-
1
,
:],
imgs
[
4
][:,
::
-
1
,
:],
imgs
[
5
][:,
::
-
1
,
:]],
axis
=
1
)
big_img
[
900
+
canva_size
*
scale_factor
:,
:,
:]
=
img_back
big_img
=
cv2
.
resize
(
big_img
,
(
int
(
1600
/
scale_factor
*
3
),
int
(
900
/
scale_factor
*
2
+
canva_size
)))
w_begin
=
int
((
1600
*
3
/
scale_factor
-
canva_size
)
//
2
)
big_img
[
int
(
900
/
scale_factor
):
int
(
900
/
scale_factor
)
+
canva_size
,
w_begin
:
w_begin
+
canva_size
,
:]
=
occ_canvas_resize
if
args
.
format
==
'image'
:
out_dir
=
os
.
path
.
join
(
vis_dir
,
f
'
{
scene_name
}
'
,
f
'
{
sample_token
}
'
)
mmcv
.
mkdir_or_exist
(
out_dir
)
for
i
,
img
in
enumerate
(
imgs
):
cv2
.
imwrite
(
os
.
path
.
join
(
out_dir
,
f
'img
{
i
}
.png'
),
img
)
cv2
.
imwrite
(
os
.
path
.
join
(
out_dir
,
'occ.png'
),
occ_canvas
)
cv2
.
imwrite
(
os
.
path
.
join
(
out_dir
,
'overall.png'
),
big_img
)
elif
args
.
format
==
'video'
:
cv2
.
putText
(
big_img
,
f
'
{
cnt
:
{
cnt
}}
'
,
(
5
,
15
),
fontFace
=
cv2
.
FONT_HERSHEY_COMPLEX
,
color
=
(
0
,
0
,
0
),
fontScale
=
0.5
)
cv2
.
putText
(
big_img
,
f
'
{
scene_name
}
'
,
(
5
,
35
),
fontFace
=
cv2
.
FONT_HERSHEY_COMPLEX
,
color
=
(
0
,
0
,
0
),
fontScale
=
0.5
)
cv2
.
putText
(
big_img
,
f
'
{
sample_token
[:
5
]
}
'
,
(
5
,
55
),
fontFace
=
cv2
.
FONT_HERSHEY_COMPLEX
,
color
=
(
0
,
0
,
0
),
fontScale
=
0.5
)
vout
.
write
(
big_img
)
if
args
.
format
==
'video'
:
vout
.
release
()
vis
.
destroy_window
()
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/tools/convert_bevdet_to_TRT.py
0 → 100644
View file @
d2b71343
import
argparse
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
import
torch.onnx
from
mmcv
import
Config
from
mmdeploy.backend.tensorrt.utils
import
save
,
search_cuda_version
try
:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
compat_cfg
except
ImportError
:
from
mmdet3d.utils
import
compat_cfg
import
os
from
typing
import
Dict
,
Optional
,
Sequence
,
Union
import
h5py
import
mmcv
import
numpy
as
np
import
onnx
import
pycuda.driver
as
cuda
import
tensorrt
as
trt
import
torch
import
tqdm
from
mmcv.runner
import
load_checkpoint
from
mmdeploy.apis.core
import
no_mp
from
mmdeploy.backend.tensorrt.calib_utils
import
HDF5Calibrator
from
mmdeploy.backend.tensorrt.init_plugins
import
load_tensorrt_plugin
from
mmdeploy.utils
import
load_config
from
packaging
import
version
from
torch.utils.data
import
DataLoader
from
mmdet3d.datasets
import
build_dataloader
,
build_dataset
from
mmdet3d.models
import
build_model
from
mmdet.datasets
import
replace_ImageToTensor
from
tools.misc.fuse_conv_bn
import
fuse_module
class
HDF5CalibratorBEVDet
(
HDF5Calibrator
):
def
get_batch
(
self
,
names
:
Sequence
[
str
],
**
kwargs
)
->
list
:
"""Get batch data."""
if
self
.
count
<
self
.
dataset_length
:
if
self
.
count
%
100
==
0
:
print
(
'%d/%d'
%
(
self
.
count
,
self
.
dataset_length
))
ret
=
[]
for
name
in
names
:
input_group
=
self
.
calib_data
[
name
]
if
name
==
'img'
:
data_np
=
input_group
[
str
(
self
.
count
)][...].
astype
(
np
.
float32
)
else
:
data_np
=
input_group
[
str
(
self
.
count
)][...].
astype
(
np
.
int32
)
# tile the tensor so we can keep the same distribute
opt_shape
=
self
.
input_shapes
[
name
][
'opt_shape'
]
data_shape
=
data_np
.
shape
reps
=
[
int
(
np
.
ceil
(
opt_s
/
data_s
))
for
opt_s
,
data_s
in
zip
(
opt_shape
,
data_shape
)
]
data_np
=
np
.
tile
(
data_np
,
reps
)
slice_list
=
tuple
(
slice
(
0
,
end
)
for
end
in
opt_shape
)
data_np
=
data_np
[
slice_list
]
data_np_cuda_ptr
=
cuda
.
mem_alloc
(
data_np
.
nbytes
)
cuda
.
memcpy_htod
(
data_np_cuda_ptr
,
np
.
ascontiguousarray
(
data_np
))
self
.
buffers
[
name
]
=
data_np_cuda_ptr
ret
.
append
(
self
.
buffers
[
name
])
self
.
count
+=
1
return
ret
else
:
return
None
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Deploy BEVDet with Tensorrt'
)
parser
.
add_argument
(
'config'
,
help
=
'deploy config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'work_dir'
,
help
=
'work dir to save file'
)
parser
.
add_argument
(
'--prefix'
,
default
=
'bevdet'
,
help
=
'prefix of the save file name'
)
parser
.
add_argument
(
'--fp16'
,
action
=
'store_true'
,
help
=
'Whether to use tensorrt fp16'
)
parser
.
add_argument
(
'--int8'
,
action
=
'store_true'
,
help
=
'Whether to use tensorrt int8'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
parser
.
add_argument
(
'--calib_num'
,
type
=
int
,
help
=
'num to calib'
)
args
=
parser
.
parse_args
()
return
args
def
get_plugin_names
():
return
[
pc
.
name
for
pc
in
trt
.
get_plugin_registry
().
plugin_creator_list
]
def
create_calib_input_data_impl
(
calib_file
:
str
,
dataloader
:
DataLoader
,
model_partition
:
bool
=
False
,
metas
:
list
=
[],
calib_num
=
None
)
->
None
:
with
h5py
.
File
(
calib_file
,
mode
=
'w'
)
as
file
:
calib_data_group
=
file
.
create_group
(
'calib_data'
)
assert
not
model_partition
# create end2end group
input_data_group
=
calib_data_group
.
create_group
(
'end2end'
)
input_group_img
=
input_data_group
.
create_group
(
'img'
)
input_keys
=
[
'ranks_bev'
,
'ranks_depth'
,
'ranks_feat'
,
'interval_starts'
,
'interval_lengths'
]
input_groups
=
[]
for
input_key
in
input_keys
:
input_groups
.
append
(
input_data_group
.
create_group
(
input_key
))
metas
=
[
metas
[
i
].
int
().
detach
().
cpu
().
numpy
()
for
i
in
range
(
len
(
metas
))
]
for
data_id
,
input_data
in
enumerate
(
tqdm
.
tqdm
(
dataloader
)):
# save end2end data
if
(
calib_num
is
not
None
)
and
(
data_id
>
calib_num
):
break
input_tensor
=
input_data
[
'img_inputs'
][
0
][
0
]
input_ndarray
=
input_tensor
.
squeeze
(
0
).
detach
().
cpu
().
numpy
()
# print(input_ndarray.shape, input_ndarray.dtype)
input_group_img
.
create_dataset
(
str
(
data_id
),
shape
=
input_ndarray
.
shape
,
compression
=
'gzip'
,
compression_opts
=
4
,
data
=
input_ndarray
)
for
kid
,
input_key
in
enumerate
(
input_keys
):
input_groups
[
kid
].
create_dataset
(
str
(
data_id
),
shape
=
metas
[
kid
].
shape
,
compression
=
'gzip'
,
compression_opts
=
4
,
data
=
metas
[
kid
])
file
.
flush
()
def
create_calib_input_data
(
calib_file
:
str
,
deploy_cfg
:
Union
[
str
,
mmcv
.
Config
],
model_cfg
:
Union
[
str
,
mmcv
.
Config
],
model_checkpoint
:
Optional
[
str
]
=
None
,
dataset_cfg
:
Optional
[
Union
[
str
,
mmcv
.
Config
]]
=
None
,
dataset_type
:
str
=
'val'
,
device
:
str
=
'cpu'
,
metas
:
list
=
[
None
],
calib_num
=
None
)
->
None
:
"""Create dataset for post-training quantization.
Args:
calib_file (str): The output calibration data file.
deploy_cfg (str | mmcv.Config): Deployment config file or
Config object.
model_cfg (str | mmcv.Config): Model config file or Config object.
model_checkpoint (str): A checkpoint path of PyTorch model,
defaults to `None`.
dataset_cfg (Optional[Union[str, mmcv.Config]], optional): Model
config to provide calibration dataset. If none, use `model_cfg`
as the dataset config. Defaults to None.
dataset_type (str, optional): The dataset type. Defaults to 'val'.
device (str, optional): Device to create dataset. Defaults to 'cpu'.
"""
with
no_mp
():
if
dataset_cfg
is
None
:
dataset_cfg
=
model_cfg
# load cfg if necessary
deploy_cfg
,
model_cfg
=
load_config
(
deploy_cfg
,
model_cfg
)
if
dataset_cfg
is
None
:
dataset_cfg
=
model_cfg
# load dataset_cfg if necessary
dataset_cfg
=
load_config
(
dataset_cfg
)[
0
]
from
mmdeploy.apis.utils
import
build_task_processor
task_processor
=
build_task_processor
(
model_cfg
,
deploy_cfg
,
device
)
dataset
=
task_processor
.
build_dataset
(
dataset_cfg
,
dataset_type
)
dataloader
=
task_processor
.
build_dataloader
(
dataset
,
1
,
1
,
dist
=
False
,
shuffle
=
False
)
create_calib_input_data_impl
(
calib_file
,
dataloader
,
model_partition
=
False
,
metas
=
metas
,
calib_num
=
calib_num
)
def
from_onnx
(
onnx_model
:
Union
[
str
,
onnx
.
ModelProto
],
output_file_prefix
:
str
,
input_shapes
:
Dict
[
str
,
Sequence
[
int
]],
max_workspace_size
:
int
=
0
,
fp16_mode
:
bool
=
False
,
int8_mode
:
bool
=
False
,
int8_param
:
Optional
[
dict
]
=
None
,
device_id
:
int
=
0
,
log_level
:
trt
.
Logger
.
Severity
=
trt
.
Logger
.
ERROR
,
**
kwargs
)
->
trt
.
ICudaEngine
:
"""Create a tensorrt engine from ONNX.
Modified from mmdeploy.backend.tensorrt.utils.from_onnx
"""
import
os
old_cuda_device
=
os
.
environ
.
get
(
'CUDA_DEVICE'
,
None
)
os
.
environ
[
'CUDA_DEVICE'
]
=
str
(
device_id
)
import
pycuda.autoinit
# noqa:F401
if
old_cuda_device
is
not
None
:
os
.
environ
[
'CUDA_DEVICE'
]
=
old_cuda_device
else
:
os
.
environ
.
pop
(
'CUDA_DEVICE'
)
load_tensorrt_plugin
()
# create builder and network
logger
=
trt
.
Logger
(
log_level
)
builder
=
trt
.
Builder
(
logger
)
EXPLICIT_BATCH
=
1
<<
(
int
)(
trt
.
NetworkDefinitionCreationFlag
.
EXPLICIT_BATCH
)
network
=
builder
.
create_network
(
EXPLICIT_BATCH
)
# parse onnx
parser
=
trt
.
OnnxParser
(
network
,
logger
)
if
isinstance
(
onnx_model
,
str
):
onnx_model
=
onnx
.
load
(
onnx_model
)
if
not
parser
.
parse
(
onnx_model
.
SerializeToString
()):
error_msgs
=
''
for
error
in
range
(
parser
.
num_errors
):
error_msgs
+=
f
'
{
parser
.
get_error
(
error
)
}
\n
'
raise
RuntimeError
(
f
'Failed to parse onnx,
{
error_msgs
}
'
)
# config builder
if
version
.
parse
(
trt
.
__version__
)
<
version
.
parse
(
'8'
):
builder
.
max_workspace_size
=
max_workspace_size
config
=
builder
.
create_builder_config
()
config
.
max_workspace_size
=
max_workspace_size
cuda_version
=
search_cuda_version
()
if
cuda_version
is
not
None
:
version_major
=
int
(
cuda_version
.
split
(
'.'
)[
0
])
if
version_major
<
11
:
# cu11 support cublasLt, so cudnn heuristic tactic should disable CUBLAS_LT # noqa E501
tactic_source
=
config
.
get_tactic_sources
()
-
(
1
<<
int
(
trt
.
TacticSource
.
CUBLAS_LT
))
config
.
set_tactic_sources
(
tactic_source
)
profile
=
builder
.
create_optimization_profile
()
for
input_name
,
param
in
input_shapes
.
items
():
min_shape
=
param
[
'min_shape'
]
opt_shape
=
param
[
'opt_shape'
]
max_shape
=
param
[
'max_shape'
]
profile
.
set_shape
(
input_name
,
min_shape
,
opt_shape
,
max_shape
)
config
.
add_optimization_profile
(
profile
)
if
fp16_mode
:
if
version
.
parse
(
trt
.
__version__
)
<
version
.
parse
(
'8'
):
builder
.
fp16_mode
=
fp16_mode
config
.
set_flag
(
trt
.
BuilderFlag
.
FP16
)
if
int8_mode
:
config
.
set_flag
(
trt
.
BuilderFlag
.
INT8
)
assert
int8_param
is
not
None
config
.
int8_calibrator
=
HDF5CalibratorBEVDet
(
int8_param
[
'calib_file'
],
input_shapes
,
model_type
=
int8_param
[
'model_type'
],
device_id
=
device_id
,
algorithm
=
int8_param
.
get
(
'algorithm'
,
trt
.
CalibrationAlgoType
.
ENTROPY_CALIBRATION_2
))
if
version
.
parse
(
trt
.
__version__
)
<
version
.
parse
(
'8'
):
builder
.
int8_mode
=
int8_mode
builder
.
int8_calibrator
=
config
.
int8_calibrator
# create engine
engine
=
builder
.
build_engine
(
network
,
config
)
assert
engine
is
not
None
,
'Failed to create TensorRT engine'
save
(
engine
,
output_file_prefix
+
'.engine'
)
print
(
'Save engine at '
,
output_file_prefix
+
'.engine'
)
return
engine
def
main
():
args
=
parse_args
()
max_workspace_size
=
200
*
200
*
256
*
(
2
**
8
)
if
not
os
.
path
.
exists
(
args
.
work_dir
):
os
.
makedirs
(
args
.
work_dir
)
load_tensorrt_plugin
()
assert
'bev_pool_v2'
in
get_plugin_names
(),
\
'bev_pool_v2 is not in the plugin list of tensorrt, '
\
'please install mmdeploy from '
\
'https://github.com/HuangJunJie2017/mmdeploy.git'
# if args.int8:
# assert args.fp16
model_prefix
=
args
.
prefix
if
args
.
int8
:
model_prefix
=
model_prefix
+
'_int8'
elif
args
.
fp16
:
model_prefix
=
model_prefix
+
'_fp16'
cfg
=
Config
.
fromfile
(
args
.
config
)
cfg
.
model
.
pretrained
=
None
cfg
.
model
.
type
=
cfg
.
model
.
type
+
'TRT'
cfg
=
compat_cfg
(
cfg
)
cfg
.
gpu_ids
=
[
0
]
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# build the dataloader
test_dataloader_default_args
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
dist
=
False
,
shuffle
=
False
)
if
isinstance
(
cfg
.
data
.
test
,
dict
):
cfg
.
data
.
test
.
test_mode
=
True
if
cfg
.
data
.
test_dataloader
.
get
(
'samples_per_gpu'
,
1
)
>
1
:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg
.
data
.
test
.
pipeline
=
replace_ImageToTensor
(
cfg
.
data
.
test
.
pipeline
)
elif
isinstance
(
cfg
.
data
.
test
,
list
):
for
ds_cfg
in
cfg
.
data
.
test
:
ds_cfg
.
test_mode
=
True
if
cfg
.
data
.
test_dataloader
.
get
(
'samples_per_gpu'
,
1
)
>
1
:
for
ds_cfg
in
cfg
.
data
.
test
:
ds_cfg
.
pipeline
=
replace_ImageToTensor
(
ds_cfg
.
pipeline
)
test_loader_cfg
=
{
**
test_dataloader_default_args
,
**
cfg
.
data
.
get
(
'test_dataloader'
,
{})
}
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
**
test_loader_cfg
)
# build the model and load checkpoint
cfg
.
model
.
train_cfg
=
None
model
=
build_model
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
# assert model.img_view_transformer.grid_size[0] == 128
# assert model.img_view_transformer.grid_size[1] == 128
# assert model.img_view_transformer.grid_size[2] == 1
if
os
.
path
.
exists
(
args
.
checkpoint
):
load_checkpoint
(
model
,
args
.
checkpoint
,
map_location
=
'cpu'
)
else
:
print
(
args
.
checkpoint
,
" does not exists!"
)
if
args
.
fuse_conv_bn
:
model_prefix
=
model_prefix
+
'_fuse'
model
=
fuse_module
(
model
)
model
.
cuda
()
model
.
eval
()
for
i
,
data
in
enumerate
(
data_loader
):
inputs
=
[
t
.
cuda
()
for
t
in
data
[
'img_inputs'
][
0
]]
img
=
inputs
[
0
].
squeeze
(
0
)
if
img
.
shape
[
0
]
>
6
:
img
=
img
[:
6
]
if
model
.
__class__
.
__name__
in
[
'FBOCCTRT'
,
'FBOCC2DTRT'
]:
metas
=
model
.
get_bev_pool_input
(
inputs
,
img_metas
=
data
[
'img_metas'
])
else
:
if
model
.
__class__
.
__name__
in
[
'BEVDetOCCTRT'
]:
metas
=
model
.
get_bev_pool_input
(
inputs
)
elif
model
.
__class__
.
__name__
in
[
'BEVDepthOCCTRT'
]:
metas
,
mlp_input
=
model
.
get_bev_pool_input
(
inputs
)
if
model
.
__class__
.
__name__
in
[
'FBOCCTRT'
,
'FBOCC2DTRT'
,
'BEVDetOCCTRT'
]:
onnx_input
=
(
img
.
float
().
contiguous
(),
metas
[
1
].
int
().
contiguous
(),
metas
[
2
].
int
().
contiguous
(),
metas
[
0
].
int
().
contiguous
(),
metas
[
3
].
int
().
contiguous
(),
metas
[
4
].
int
().
contiguous
())
dynamic_axes
=
{
"ranks_depth"
:
{
0
:
'M'
},
"ranks_feat"
:
{
0
:
'M'
},
"ranks_bev"
:
{
0
:
'M'
},
"interval_starts"
:
{
0
:
'N'
},
"interval_lengths"
:
{
0
:
'N'
},
}
input_names
=
[
'img'
,
'ranks_depth'
,
'ranks_feat'
,
'ranks_bev'
,
'interval_starts'
,
'interval_lengths'
]
elif
model
.
__class__
.
__name__
in
[
'BEVDepthOCCTRT'
]:
onnx_input
=
(
img
.
float
().
contiguous
(),
metas
[
1
].
int
().
contiguous
(),
metas
[
2
].
int
().
contiguous
(),
metas
[
0
].
int
().
contiguous
(),
metas
[
3
].
int
().
contiguous
(),
metas
[
4
].
int
().
contiguous
(),
mlp_input
)
dynamic_axes
=
{
"ranks_depth"
:
{
0
:
'M'
},
"ranks_feat"
:
{
0
:
'M'
},
"ranks_bev"
:
{
0
:
'M'
},
"interval_starts"
:
{
0
:
'N'
},
"interval_lengths"
:
{
0
:
'N'
},
# "mlp_input" : {0: 'K'},
}
input_names
=
[
'img'
,
'ranks_depth'
,
'ranks_feat'
,
'ranks_bev'
,
'interval_starts'
,
'interval_lengths'
,
'mlp_input'
,
]
with
torch
.
no_grad
():
if
(
model
.
wdet3d
==
True
)
and
(
model
.
wocc
==
False
)
:
output_names
=
[
f
'output_
{
j
}
'
for
j
in
range
(
6
*
len
(
model
.
pts_bbox_head
.
task_heads
))]
elif
(
model
.
wdet3d
==
True
)
and
(
model
.
wocc
==
True
)
:
output_names
=
[
f
'output_
{
j
}
'
for
j
in
range
(
1
+
6
*
len
(
model
.
pts_bbox_head
.
task_heads
))]
elif
(
model
.
wdet3d
==
False
)
and
(
model
.
wocc
==
True
)
:
output_names
=
[
f
'output_
{
j
}
'
for
j
in
range
(
1
)]
else
:
raise
(
" At least one of wdet3d and wocc is set as True!! "
)
model
.
forward
=
model
.
forward_ori
torch
.
onnx
.
export
(
model
,
onnx_input
,
args
.
work_dir
+
model_prefix
+
'.onnx'
,
opset_version
=
11
,
dynamic_axes
=
dynamic_axes
,
input_names
=
input_names
,
output_names
=
output_names
)
print
(
'output_names:'
,
output_names
)
print
(
'====== onnx is saved at : '
,
args
.
work_dir
+
model_prefix
+
'.onnx'
)
# check onnx model
onnx_model
=
onnx
.
load
(
args
.
work_dir
+
model_prefix
+
'.onnx'
)
try
:
onnx
.
checker
.
check_model
(
onnx_model
)
except
Exception
:
print
(
'ONNX Model Incorrect'
)
else
:
print
(
'ONNX Model Correct'
)
model
.
forward
=
model
.
forward_with_argmax
output_names
=
[
f
'cls_occ_label'
]
torch
.
onnx
.
export
(
model
,
onnx_input
,
args
.
work_dir
+
model_prefix
+
'_with_argmax.onnx'
,
opset_version
=
11
,
dynamic_axes
=
dynamic_axes
,
input_names
=
input_names
,
output_names
=
output_names
)
print
(
'output_names:'
,
output_names
)
print
(
'====== onnx is saved at : '
,
args
.
work_dir
+
model_prefix
+
'_with_argmax.onnx'
)
# check onnx model
onnx_model
=
onnx
.
load
(
args
.
work_dir
+
model_prefix
+
'_with_argmax.onnx'
)
try
:
onnx
.
checker
.
check_model
(
onnx_model
)
except
Exception
:
print
(
'ONNX Model Incorrect'
)
else
:
print
(
'ONNX Model Correct'
)
break
# convert to tensorrt
num_points
=
metas
[
0
].
shape
[
0
]
num_intervals
=
metas
[
3
].
shape
[
0
]
img_shape
=
img
.
shape
input_shapes
=
dict
(
img
=
dict
(
min_shape
=
img_shape
,
opt_shape
=
img_shape
,
max_shape
=
img_shape
),
ranks_depth
=
dict
(
min_shape
=
[
num_points
],
opt_shape
=
[
num_points
],
max_shape
=
[
num_points
]),
ranks_feat
=
dict
(
min_shape
=
[
num_points
],
opt_shape
=
[
num_points
],
max_shape
=
[
num_points
]),
ranks_bev
=
dict
(
min_shape
=
[
num_points
],
opt_shape
=
[
num_points
],
max_shape
=
[
num_points
]),
interval_starts
=
dict
(
min_shape
=
[
num_intervals
],
opt_shape
=
[
num_intervals
],
max_shape
=
[
num_intervals
]),
interval_lengths
=
dict
(
min_shape
=
[
num_intervals
],
opt_shape
=
[
num_intervals
],
max_shape
=
[
num_intervals
]))
deploy_cfg
=
dict
(
backend_config
=
dict
(
type
=
'tensorrt'
,
common_config
=
dict
(
fp16_mode
=
args
.
fp16
,
max_workspace_size
=
max_workspace_size
,
int8_mode
=
args
.
int8
),
model_inputs
=
[
dict
(
input_shapes
=
input_shapes
)]),
codebase_config
=
dict
(
type
=
'mmdet3d'
,
task
=
'VoxelDetection'
,
model_type
=
'end2end'
))
if
args
.
int8
:
calib_filename
=
'calib_data.h5'
calib_path
=
os
.
path
.
join
(
args
.
work_dir
,
calib_filename
)
create_calib_input_data
(
calib_path
,
deploy_cfg
,
args
.
config
,
args
.
checkpoint
,
dataset_cfg
=
None
,
dataset_type
=
'val'
,
device
=
'cuda:0'
,
metas
=
metas
,
calib_num
=
args
.
calib_num
)
from_onnx
(
args
.
work_dir
+
model_prefix
+
'.onnx'
,
args
.
work_dir
+
model_prefix
,
fp16_mode
=
args
.
fp16
,
int8_mode
=
args
.
int8
,
int8_param
=
dict
(
calib_file
=
os
.
path
.
join
(
args
.
work_dir
,
'calib_data.h5'
),
model_type
=
'end2end'
),
max_workspace_size
=
max_workspace_size
,
input_shapes
=
input_shapes
)
# if args.int8:
# os.remove(calib_path)
if
__name__
==
'__main__'
:
main
()
docker-hub/FlashOCC/Flashocc/tools/create_data_bevdet.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
pickle
import
numpy
as
np
from
nuscenes
import
NuScenes
from
nuscenes.utils.data_classes
import
Box
from
pyquaternion
import
Quaternion
from
tools.data_converter
import
nuscenes_converter
as
nuscenes_converter
map_name_from_general_to_detection
=
{
'human.pedestrian.adult'
:
'pedestrian'
,
'human.pedestrian.child'
:
'pedestrian'
,
'human.pedestrian.wheelchair'
:
'ignore'
,
'human.pedestrian.stroller'
:
'ignore'
,
'human.pedestrian.personal_mobility'
:
'ignore'
,
'human.pedestrian.police_officer'
:
'pedestrian'
,
'human.pedestrian.construction_worker'
:
'pedestrian'
,
'animal'
:
'ignore'
,
'vehicle.car'
:
'car'
,
'vehicle.motorcycle'
:
'motorcycle'
,
'vehicle.bicycle'
:
'bicycle'
,
'vehicle.bus.bendy'
:
'bus'
,
'vehicle.bus.rigid'
:
'bus'
,
'vehicle.truck'
:
'truck'
,
'vehicle.construction'
:
'construction_vehicle'
,
'vehicle.emergency.ambulance'
:
'ignore'
,
'vehicle.emergency.police'
:
'ignore'
,
'vehicle.trailer'
:
'trailer'
,
'movable_object.barrier'
:
'barrier'
,
'movable_object.trafficcone'
:
'traffic_cone'
,
'movable_object.pushable_pullable'
:
'ignore'
,
'movable_object.debris'
:
'ignore'
,
'static_object.bicycle_rack'
:
'ignore'
,
}
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
def
get_gt
(
info
):
"""Generate gt labels from info.
Args:
info(dict): Infos needed to generate gt labels.
Returns:
Tensor: GT bboxes.
Tensor: GT labels.
"""
ego2global_rotation
=
info
[
'cams'
][
'CAM_FRONT'
][
'ego2global_rotation'
]
ego2global_translation
=
info
[
'cams'
][
'CAM_FRONT'
][
'ego2global_translation'
]
trans
=
-
np
.
array
(
ego2global_translation
)
rot
=
Quaternion
(
ego2global_rotation
).
inverse
gt_boxes
=
list
()
gt_labels
=
list
()
for
ann_info
in
info
[
'ann_infos'
]:
# Use ego coordinate.
if
(
map_name_from_general_to_detection
[
ann_info
[
'category_name'
]]
not
in
classes
or
ann_info
[
'num_lidar_pts'
]
+
ann_info
[
'num_radar_pts'
]
<=
0
):
continue
box
=
Box
(
ann_info
[
'translation'
],
ann_info
[
'size'
],
Quaternion
(
ann_info
[
'rotation'
]),
velocity
=
ann_info
[
'velocity'
],
)
box
.
translate
(
trans
)
box
.
rotate
(
rot
)
box_xyz
=
np
.
array
(
box
.
center
)
box_dxdydz
=
np
.
array
(
box
.
wlh
)[[
1
,
0
,
2
]]
box_yaw
=
np
.
array
([
box
.
orientation
.
yaw_pitch_roll
[
0
]])
box_velo
=
np
.
array
(
box
.
velocity
[:
2
])
gt_box
=
np
.
concatenate
([
box_xyz
,
box_dxdydz
,
box_yaw
,
box_velo
])
gt_boxes
.
append
(
gt_box
)
gt_labels
.
append
(
classes
.
index
(
map_name_from_general_to_detection
[
ann_info
[
'category_name'
]]))
return
gt_boxes
,
gt_labels
def
nuscenes_data_prep
(
root_path
,
info_prefix
,
version
,
max_sweeps
=
10
):
"""Prepare data related to nuScenes dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
max_sweeps (int, optional): Number of input consecutive frames.
Default: 10
"""
nuscenes_converter
.
create_nuscenes_infos
(
root_path
,
info_prefix
,
version
=
version
,
max_sweeps
=
max_sweeps
)
def
add_ann_adj_info
(
extra_tag
):
nuscenes_version
=
'v1.0-trainval'
dataroot
=
'./data/nuscenes/'
nuscenes
=
NuScenes
(
nuscenes_version
,
dataroot
)
for
set
in
[
'train'
,
'val'
]:
dataset
=
pickle
.
load
(
open
(
'%s/%s_infos_%s.pkl'
%
(
dataroot
,
extra_tag
,
set
),
'rb'
))
for
id
in
range
(
len
(
dataset
[
'infos'
])):
if
id
%
10
==
0
:
print
(
'%d/%d'
%
(
id
,
len
(
dataset
[
'infos'
])))
info
=
dataset
[
'infos'
][
id
]
# get sweep adjacent frame info
sample
=
nuscenes
.
get
(
'sample'
,
info
[
'token'
])
ann_infos
=
list
()
for
ann
in
sample
[
'anns'
]:
ann_info
=
nuscenes
.
get
(
'sample_annotation'
,
ann
)
velocity
=
nuscenes
.
box_velocity
(
ann_info
[
'token'
])
if
np
.
any
(
np
.
isnan
(
velocity
)):
velocity
=
np
.
zeros
(
3
)
ann_info
[
'velocity'
]
=
velocity
ann_infos
.
append
(
ann_info
)
dataset
[
'infos'
][
id
][
'ann_infos'
]
=
ann_infos
dataset
[
'infos'
][
id
][
'ann_infos'
]
=
get_gt
(
dataset
[
'infos'
][
id
])
dataset
[
'infos'
][
id
][
'scene_token'
]
=
sample
[
'scene_token'
]
scene
=
nuscenes
.
get
(
'scene'
,
sample
[
'scene_token'
])
dataset
[
'infos'
][
id
][
'scene_name'
]
=
scene
[
'name'
]
dataset
[
'infos'
][
id
][
'occ_path'
]
=
\
'./data/nuscenes/gts/%s/%s'
%
(
scene
[
'name'
],
info
[
'token'
])
with
open
(
'%s/%s_infos_%s.pkl'
%
(
dataroot
,
extra_tag
,
set
),
'wb'
)
as
fid
:
pickle
.
dump
(
dataset
,
fid
)
if
__name__
==
'__main__'
:
dataset
=
'nuscenes'
version
=
'v1.0'
train_version
=
f
'
{
version
}
-trainval'
root_path
=
'data/nuscenes'
extra_tag
=
'bevdetv2-nuscenes'
nuscenes_data_prep
(
root_path
=
root_path
,
info_prefix
=
extra_tag
,
version
=
train_version
,
max_sweeps
=
0
)
print
(
'add_ann_infos'
)
add_ann_adj_info
(
extra_tag
)
docker-hub/FlashOCC/Flashocc/tools/data_converter/__init__.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/__init__.cpython-310.pyc
0 → 100644
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/tools/data_converter/__pycache__/nuscenes_converter.cpython-310.pyc
0 → 100644
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/tools/data_converter/create_gt_database.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
pickle
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
from
mmcv
import
track_iter_progress
from
mmcv.ops
import
roi_align
from
pycocotools
import
mask
as
maskUtils
from
pycocotools.coco
import
COCO
from
mmdet3d.core.bbox
import
box_np_ops
as
box_np_ops
from
mmdet3d.datasets
import
build_dataset
from
mmdet.core.evaluation.bbox_overlaps
import
bbox_overlaps
def
_poly2mask
(
mask_ann
,
img_h
,
img_w
):
if
isinstance
(
mask_ann
,
list
):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
rle
=
maskUtils
.
merge
(
rles
)
elif
isinstance
(
mask_ann
[
'counts'
],
list
):
# uncompressed RLE
rle
=
maskUtils
.
frPyObjects
(
mask_ann
,
img_h
,
img_w
)
else
:
# rle
rle
=
mask_ann
mask
=
maskUtils
.
decode
(
rle
)
return
mask
def
_parse_coco_ann_info
(
ann_info
):
gt_bboxes
=
[]
gt_labels
=
[]
gt_bboxes_ignore
=
[]
gt_masks_ann
=
[]
for
i
,
ann
in
enumerate
(
ann_info
):
if
ann
.
get
(
'ignore'
,
False
):
continue
x1
,
y1
,
w
,
h
=
ann
[
'bbox'
]
if
ann
[
'area'
]
<=
0
:
continue
bbox
=
[
x1
,
y1
,
x1
+
w
,
y1
+
h
]
if
ann
.
get
(
'iscrowd'
,
False
):
gt_bboxes_ignore
.
append
(
bbox
)
else
:
gt_bboxes
.
append
(
bbox
)
gt_masks_ann
.
append
(
ann
[
'segmentation'
])
if
gt_bboxes
:
gt_bboxes
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
(
gt_labels
,
dtype
=
np
.
int64
)
else
:
gt_bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
if
gt_bboxes_ignore
:
gt_bboxes_ignore
=
np
.
array
(
gt_bboxes_ignore
,
dtype
=
np
.
float32
)
else
:
gt_bboxes_ignore
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
ann
=
dict
(
bboxes
=
gt_bboxes
,
bboxes_ignore
=
gt_bboxes_ignore
,
masks
=
gt_masks_ann
)
return
ann
def
crop_image_patch_v2
(
pos_proposals
,
pos_assigned_gt_inds
,
gt_masks
):
import
torch
from
torch.nn.modules.utils
import
_pair
device
=
pos_proposals
.
device
num_pos
=
pos_proposals
.
size
(
0
)
fake_inds
=
(
torch
.
arange
(
num_pos
,
device
=
device
).
to
(
dtype
=
pos_proposals
.
dtype
)[:,
None
])
rois
=
torch
.
cat
([
fake_inds
,
pos_proposals
],
dim
=
1
)
# Nx5
mask_size
=
_pair
(
28
)
rois
=
rois
.
to
(
device
=
device
)
gt_masks_th
=
(
torch
.
from_numpy
(
gt_masks
).
to
(
device
).
index_select
(
0
,
pos_assigned_gt_inds
).
to
(
dtype
=
rois
.
dtype
))
# Use RoIAlign could apparently accelerate the training (~0.1s/iter)
targets
=
(
roi_align
(
gt_masks_th
,
rois
,
mask_size
[::
-
1
],
1.0
,
0
,
True
).
squeeze
(
1
))
return
targets
def
crop_image_patch
(
pos_proposals
,
gt_masks
,
pos_assigned_gt_inds
,
org_img
):
num_pos
=
pos_proposals
.
shape
[
0
]
masks
=
[]
img_patches
=
[]
for
i
in
range
(
num_pos
):
gt_mask
=
gt_masks
[
pos_assigned_gt_inds
[
i
]]
bbox
=
pos_proposals
[
i
,
:].
astype
(
np
.
int32
)
x1
,
y1
,
x2
,
y2
=
bbox
w
=
np
.
maximum
(
x2
-
x1
+
1
,
1
)
h
=
np
.
maximum
(
y2
-
y1
+
1
,
1
)
mask_patch
=
gt_mask
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
masked_img
=
gt_mask
[...,
None
]
*
org_img
img_patch
=
masked_img
[
y1
:
y1
+
h
,
x1
:
x1
+
w
]
img_patches
.
append
(
img_patch
)
masks
.
append
(
mask_patch
)
return
img_patches
,
masks
def
create_groundtruth_database
(
dataset_class_name
,
data_path
,
info_prefix
,
info_path
=
None
,
mask_anno_path
=
None
,
used_classes
=
None
,
database_save_path
=
None
,
db_info_save_path
=
None
,
relative_path
=
True
,
add_rgb
=
False
,
lidar_only
=
False
,
bev_only
=
False
,
coors_range
=
None
,
with_mask
=
False
):
"""Given the raw data, generate the ground truth database.
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str, optional): Path of the info file.
Default: None.
mask_anno_path (str, optional): Path of the mask_anno.
Default: None.
used_classes (list[str], optional): Classes have been used.
Default: None.
database_save_path (str, optional): Path to save database.
Default: None.
db_info_save_path (str, optional): Path to save db_info.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
with_mask (bool, optional): Whether to use mask.
Default: False.
"""
print
(
f
'Create GT Database of
{
dataset_class_name
}
'
)
dataset_cfg
=
dict
(
type
=
dataset_class_name
,
data_root
=
data_path
,
ann_file
=
info_path
)
if
dataset_class_name
==
'KittiDataset'
:
file_client_args
=
dict
(
backend
=
'disk'
)
dataset_cfg
.
update
(
test_mode
=
False
,
split
=
'training'
,
modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
with_mask
,
),
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
)
])
elif
dataset_class_name
==
'NuScenesDataset'
:
dataset_cfg
.
update
(
use_valid_flag
=
True
,
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
)
])
elif
dataset_class_name
==
'WaymoDataset'
:
file_client_args
=
dict
(
backend
=
'disk'
)
dataset_cfg
.
update
(
test_mode
=
False
,
split
=
'training'
,
modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
),
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
6
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
)
])
dataset
=
build_dataset
(
dataset_cfg
)
if
database_save_path
is
None
:
database_save_path
=
osp
.
join
(
data_path
,
f
'
{
info_prefix
}
_gt_database'
)
if
db_info_save_path
is
None
:
db_info_save_path
=
osp
.
join
(
data_path
,
f
'
{
info_prefix
}
_dbinfos_train.pkl'
)
mmcv
.
mkdir_or_exist
(
database_save_path
)
all_db_infos
=
dict
()
if
with_mask
:
coco
=
COCO
(
osp
.
join
(
data_path
,
mask_anno_path
))
imgIds
=
coco
.
getImgIds
()
file2id
=
dict
()
for
i
in
imgIds
:
info
=
coco
.
loadImgs
([
i
])[
0
]
file2id
.
update
({
info
[
'file_name'
]:
i
})
group_counter
=
0
for
j
in
track_iter_progress
(
list
(
range
(
len
(
dataset
)))):
input_dict
=
dataset
.
get_data_info
(
j
)
dataset
.
pre_pipeline
(
input_dict
)
example
=
dataset
.
pipeline
(
input_dict
)
annos
=
example
[
'ann_info'
]
image_idx
=
example
[
'sample_idx'
]
points
=
example
[
'points'
].
tensor
.
numpy
()
gt_boxes_3d
=
annos
[
'gt_bboxes_3d'
].
tensor
.
numpy
()
names
=
annos
[
'gt_names'
]
group_dict
=
dict
()
if
'group_ids'
in
annos
:
group_ids
=
annos
[
'group_ids'
]
else
:
group_ids
=
np
.
arange
(
gt_boxes_3d
.
shape
[
0
],
dtype
=
np
.
int64
)
difficulty
=
np
.
zeros
(
gt_boxes_3d
.
shape
[
0
],
dtype
=
np
.
int32
)
if
'difficulty'
in
annos
:
difficulty
=
annos
[
'difficulty'
]
num_obj
=
gt_boxes_3d
.
shape
[
0
]
point_indices
=
box_np_ops
.
points_in_rbbox
(
points
,
gt_boxes_3d
)
if
with_mask
:
# prepare masks
gt_boxes
=
annos
[
'gt_bboxes'
]
img_path
=
osp
.
split
(
example
[
'img_info'
][
'filename'
])[
-
1
]
if
img_path
not
in
file2id
.
keys
():
print
(
f
'skip image
{
img_path
}
for empty mask'
)
continue
img_id
=
file2id
[
img_path
]
kins_annIds
=
coco
.
getAnnIds
(
imgIds
=
img_id
)
kins_raw_info
=
coco
.
loadAnns
(
kins_annIds
)
kins_ann_info
=
_parse_coco_ann_info
(
kins_raw_info
)
h
,
w
=
annos
[
'img_shape'
][:
2
]
gt_masks
=
[
_poly2mask
(
mask
,
h
,
w
)
for
mask
in
kins_ann_info
[
'masks'
]
]
# get mask inds based on iou mapping
bbox_iou
=
bbox_overlaps
(
kins_ann_info
[
'bboxes'
],
gt_boxes
)
mask_inds
=
bbox_iou
.
argmax
(
axis
=
0
)
valid_inds
=
(
bbox_iou
.
max
(
axis
=
0
)
>
0.5
)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches
,
object_masks
=
crop_image_patch
(
gt_boxes
,
gt_masks
,
mask_inds
,
annos
[
'img'
])
for
i
in
range
(
num_obj
):
filename
=
f
'
{
image_idx
}
_
{
names
[
i
]
}
_
{
i
}
.bin'
abs_filepath
=
osp
.
join
(
database_save_path
,
filename
)
rel_filepath
=
osp
.
join
(
f
'
{
info_prefix
}
_gt_database'
,
filename
)
# save point clouds and image patches for each object
gt_points
=
points
[
point_indices
[:,
i
]]
gt_points
[:,
:
3
]
-=
gt_boxes_3d
[
i
,
:
3
]
if
with_mask
:
if
object_masks
[
i
].
sum
()
==
0
or
not
valid_inds
[
i
]:
# Skip object for empty or invalid mask
continue
img_patch_path
=
abs_filepath
+
'.png'
mask_patch_path
=
abs_filepath
+
'.mask.png'
mmcv
.
imwrite
(
object_img_patches
[
i
],
img_patch_path
)
mmcv
.
imwrite
(
object_masks
[
i
],
mask_patch_path
)
with
open
(
abs_filepath
,
'w'
)
as
f
:
gt_points
.
tofile
(
f
)
if
(
used_classes
is
None
)
or
names
[
i
]
in
used_classes
:
db_info
=
{
'name'
:
names
[
i
],
'path'
:
rel_filepath
,
'image_idx'
:
image_idx
,
'gt_idx'
:
i
,
'box3d_lidar'
:
gt_boxes_3d
[
i
],
'num_points_in_gt'
:
gt_points
.
shape
[
0
],
'difficulty'
:
difficulty
[
i
],
}
local_group_id
=
group_ids
[
i
]
# if local_group_id >= 0:
if
local_group_id
not
in
group_dict
:
group_dict
[
local_group_id
]
=
group_counter
group_counter
+=
1
db_info
[
'group_id'
]
=
group_dict
[
local_group_id
]
if
'score'
in
annos
:
db_info
[
'score'
]
=
annos
[
'score'
][
i
]
if
with_mask
:
db_info
.
update
({
'box2d_camera'
:
gt_boxes
[
i
]})
if
names
[
i
]
in
all_db_infos
:
all_db_infos
[
names
[
i
]].
append
(
db_info
)
else
:
all_db_infos
[
names
[
i
]]
=
[
db_info
]
for
k
,
v
in
all_db_infos
.
items
():
print
(
f
'load
{
len
(
v
)
}
{
k
}
database infos'
)
with
open
(
db_info_save_path
,
'wb'
)
as
f
:
pickle
.
dump
(
all_db_infos
,
f
)
class
GTDatabaseCreater
:
"""Given the raw data, generate the ground truth database. This is the
parallel version. For serialized version, please refer to
`create_groundtruth_database`
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str, optional): Path of the info file.
Default: None.
mask_anno_path (str, optional): Path of the mask_anno.
Default: None.
used_classes (list[str], optional): Classes have been used.
Default: None.
database_save_path (str, optional): Path to save database.
Default: None.
db_info_save_path (str, optional): Path to save db_info.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
with_mask (bool, optional): Whether to use mask.
Default: False.
num_worker (int, optional): the number of parallel workers to use.
Default: 8.
"""
def
__init__
(
self
,
dataset_class_name
,
data_path
,
info_prefix
,
info_path
=
None
,
mask_anno_path
=
None
,
used_classes
=
None
,
database_save_path
=
None
,
db_info_save_path
=
None
,
relative_path
=
True
,
add_rgb
=
False
,
lidar_only
=
False
,
bev_only
=
False
,
coors_range
=
None
,
with_mask
=
False
,
num_worker
=
8
)
->
None
:
self
.
dataset_class_name
=
dataset_class_name
self
.
data_path
=
data_path
self
.
info_prefix
=
info_prefix
self
.
info_path
=
info_path
self
.
mask_anno_path
=
mask_anno_path
self
.
used_classes
=
used_classes
self
.
database_save_path
=
database_save_path
self
.
db_info_save_path
=
db_info_save_path
self
.
relative_path
=
relative_path
self
.
add_rgb
=
add_rgb
self
.
lidar_only
=
lidar_only
self
.
bev_only
=
bev_only
self
.
coors_range
=
coors_range
self
.
with_mask
=
with_mask
self
.
num_worker
=
num_worker
self
.
pipeline
=
None
def
create_single
(
self
,
input_dict
):
group_counter
=
0
single_db_infos
=
dict
()
example
=
self
.
pipeline
(
input_dict
)
annos
=
example
[
'ann_info'
]
image_idx
=
example
[
'sample_idx'
]
points
=
example
[
'points'
].
tensor
.
numpy
()
gt_boxes_3d
=
annos
[
'gt_bboxes_3d'
].
tensor
.
numpy
()
names
=
annos
[
'gt_names'
]
group_dict
=
dict
()
if
'group_ids'
in
annos
:
group_ids
=
annos
[
'group_ids'
]
else
:
group_ids
=
np
.
arange
(
gt_boxes_3d
.
shape
[
0
],
dtype
=
np
.
int64
)
difficulty
=
np
.
zeros
(
gt_boxes_3d
.
shape
[
0
],
dtype
=
np
.
int32
)
if
'difficulty'
in
annos
:
difficulty
=
annos
[
'difficulty'
]
num_obj
=
gt_boxes_3d
.
shape
[
0
]
point_indices
=
box_np_ops
.
points_in_rbbox
(
points
,
gt_boxes_3d
)
if
self
.
with_mask
:
# prepare masks
gt_boxes
=
annos
[
'gt_bboxes'
]
img_path
=
osp
.
split
(
example
[
'img_info'
][
'filename'
])[
-
1
]
if
img_path
not
in
self
.
file2id
.
keys
():
print
(
f
'skip image
{
img_path
}
for empty mask'
)
return
single_db_infos
img_id
=
self
.
file2id
[
img_path
]
kins_annIds
=
self
.
coco
.
getAnnIds
(
imgIds
=
img_id
)
kins_raw_info
=
self
.
coco
.
loadAnns
(
kins_annIds
)
kins_ann_info
=
_parse_coco_ann_info
(
kins_raw_info
)
h
,
w
=
annos
[
'img_shape'
][:
2
]
gt_masks
=
[
_poly2mask
(
mask
,
h
,
w
)
for
mask
in
kins_ann_info
[
'masks'
]
]
# get mask inds based on iou mapping
bbox_iou
=
bbox_overlaps
(
kins_ann_info
[
'bboxes'
],
gt_boxes
)
mask_inds
=
bbox_iou
.
argmax
(
axis
=
0
)
valid_inds
=
(
bbox_iou
.
max
(
axis
=
0
)
>
0.5
)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches
,
object_masks
=
crop_image_patch
(
gt_boxes
,
gt_masks
,
mask_inds
,
annos
[
'img'
])
for
i
in
range
(
num_obj
):
filename
=
f
'
{
image_idx
}
_
{
names
[
i
]
}
_
{
i
}
.bin'
abs_filepath
=
osp
.
join
(
self
.
database_save_path
,
filename
)
rel_filepath
=
osp
.
join
(
f
'
{
self
.
info_prefix
}
_gt_database'
,
filename
)
# save point clouds and image patches for each object
gt_points
=
points
[
point_indices
[:,
i
]]
gt_points
[:,
:
3
]
-=
gt_boxes_3d
[
i
,
:
3
]
if
self
.
with_mask
:
if
object_masks
[
i
].
sum
()
==
0
or
not
valid_inds
[
i
]:
# Skip object for empty or invalid mask
continue
img_patch_path
=
abs_filepath
+
'.png'
mask_patch_path
=
abs_filepath
+
'.mask.png'
mmcv
.
imwrite
(
object_img_patches
[
i
],
img_patch_path
)
mmcv
.
imwrite
(
object_masks
[
i
],
mask_patch_path
)
with
open
(
abs_filepath
,
'w'
)
as
f
:
gt_points
.
tofile
(
f
)
if
(
self
.
used_classes
is
None
)
or
names
[
i
]
in
self
.
used_classes
:
db_info
=
{
'name'
:
names
[
i
],
'path'
:
rel_filepath
,
'image_idx'
:
image_idx
,
'gt_idx'
:
i
,
'box3d_lidar'
:
gt_boxes_3d
[
i
],
'num_points_in_gt'
:
gt_points
.
shape
[
0
],
'difficulty'
:
difficulty
[
i
],
}
local_group_id
=
group_ids
[
i
]
# if local_group_id >= 0:
if
local_group_id
not
in
group_dict
:
group_dict
[
local_group_id
]
=
group_counter
group_counter
+=
1
db_info
[
'group_id'
]
=
group_dict
[
local_group_id
]
if
'score'
in
annos
:
db_info
[
'score'
]
=
annos
[
'score'
][
i
]
if
self
.
with_mask
:
db_info
.
update
({
'box2d_camera'
:
gt_boxes
[
i
]})
if
names
[
i
]
in
single_db_infos
:
single_db_infos
[
names
[
i
]].
append
(
db_info
)
else
:
single_db_infos
[
names
[
i
]]
=
[
db_info
]
return
single_db_infos
def
create
(
self
):
print
(
f
'Create GT Database of
{
self
.
dataset_class_name
}
'
)
dataset_cfg
=
dict
(
type
=
self
.
dataset_class_name
,
data_root
=
self
.
data_path
,
ann_file
=
self
.
info_path
)
if
self
.
dataset_class_name
==
'KittiDataset'
:
file_client_args
=
dict
(
backend
=
'disk'
)
dataset_cfg
.
update
(
test_mode
=
False
,
split
=
'training'
,
modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
self
.
with_mask
,
),
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
)
])
elif
self
.
dataset_class_name
==
'NuScenesDataset'
:
dataset_cfg
.
update
(
use_valid_flag
=
True
,
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
)
])
elif
self
.
dataset_class_name
==
'WaymoDataset'
:
file_client_args
=
dict
(
backend
=
'disk'
)
dataset_cfg
.
update
(
test_mode
=
False
,
split
=
'training'
,
modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
),
pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
6
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
)
])
dataset
=
build_dataset
(
dataset_cfg
)
self
.
pipeline
=
dataset
.
pipeline
if
self
.
database_save_path
is
None
:
self
.
database_save_path
=
osp
.
join
(
self
.
data_path
,
f
'
{
self
.
info_prefix
}
_gt_database'
)
if
self
.
db_info_save_path
is
None
:
self
.
db_info_save_path
=
osp
.
join
(
self
.
data_path
,
f
'
{
self
.
info_prefix
}
_dbinfos_train.pkl'
)
mmcv
.
mkdir_or_exist
(
self
.
database_save_path
)
if
self
.
with_mask
:
self
.
coco
=
COCO
(
osp
.
join
(
self
.
data_path
,
self
.
mask_anno_path
))
imgIds
=
self
.
coco
.
getImgIds
()
self
.
file2id
=
dict
()
for
i
in
imgIds
:
info
=
self
.
coco
.
loadImgs
([
i
])[
0
]
self
.
file2id
.
update
({
info
[
'file_name'
]:
i
})
def
loop_dataset
(
i
):
input_dict
=
dataset
.
get_data_info
(
i
)
dataset
.
pre_pipeline
(
input_dict
)
return
input_dict
multi_db_infos
=
mmcv
.
track_parallel_progress
(
self
.
create_single
,
((
loop_dataset
(
i
)
for
i
in
range
(
len
(
dataset
))),
len
(
dataset
)),
self
.
num_worker
)
print
(
'Make global unique group id'
)
group_counter_offset
=
0
all_db_infos
=
dict
()
for
single_db_infos
in
track_iter_progress
(
multi_db_infos
):
group_id
=
-
1
for
name
,
name_db_infos
in
single_db_infos
.
items
():
for
db_info
in
name_db_infos
:
group_id
=
max
(
group_id
,
db_info
[
'group_id'
])
db_info
[
'group_id'
]
+=
group_counter_offset
if
name
not
in
all_db_infos
:
all_db_infos
[
name
]
=
[]
all_db_infos
[
name
].
extend
(
name_db_infos
)
group_counter_offset
+=
(
group_id
+
1
)
for
k
,
v
in
all_db_infos
.
items
():
print
(
f
'load
{
len
(
v
)
}
{
k
}
database infos'
)
with
open
(
self
.
db_info_save_path
,
'wb'
)
as
f
:
pickle
.
dump
(
all_db_infos
,
f
)
docker-hub/FlashOCC/Flashocc/tools/data_converter/indoor_converter.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
os
import
mmcv
import
numpy
as
np
from
tools.data_converter.s3dis_data_utils
import
S3DISData
,
S3DISSegData
from
tools.data_converter.scannet_data_utils
import
ScanNetData
,
ScanNetSegData
from
tools.data_converter.sunrgbd_data_utils
import
SUNRGBDData
def
create_indoor_info_file
(
data_path
,
pkl_prefix
=
'sunrgbd'
,
save_path
=
None
,
workers
=
4
,
**
kwargs
):
"""Create indoor information file.
Get information of the raw data and save it to the pkl file.
Args:
data_path (str): Path of the data.
pkl_prefix (str, optional): Prefix of the pkl to be saved.
Default: 'sunrgbd'.
save_path (str, optional): Path of the pkl to be saved. Default: None.
workers (int, optional): Number of threads to be used. Default: 4.
kwargs (dict): Additional parameters for dataset-specific Data class.
May include `use_v1` for SUN RGB-D and `num_points`.
"""
assert
os
.
path
.
exists
(
data_path
)
assert
pkl_prefix
in
[
'sunrgbd'
,
'scannet'
,
's3dis'
],
\
f
'unsupported indoor dataset
{
pkl_prefix
}
'
save_path
=
data_path
if
save_path
is
None
else
save_path
assert
os
.
path
.
exists
(
save_path
)
# generate infos for both detection and segmentation task
if
pkl_prefix
in
[
'sunrgbd'
,
'scannet'
]:
train_filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_train.pkl'
)
val_filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_val.pkl'
)
if
pkl_prefix
==
'sunrgbd'
:
# SUN RGB-D has a train-val split
num_points
=
kwargs
.
get
(
'num_points'
,
-
1
)
use_v1
=
kwargs
.
get
(
'use_v1'
,
False
)
train_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'train'
,
use_v1
=
use_v1
,
num_points
=
num_points
)
val_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'val'
,
use_v1
=
use_v1
,
num_points
=
num_points
)
else
:
# ScanNet has a train-val-test split
train_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'train'
)
val_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'val'
)
test_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'test'
)
test_filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_test.pkl'
)
infos_train
=
train_dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
True
)
mmcv
.
dump
(
infos_train
,
train_filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info train file is saved to
{
train_filename
}
'
)
infos_val
=
val_dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
True
)
mmcv
.
dump
(
infos_val
,
val_filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info val file is saved to
{
val_filename
}
'
)
if
pkl_prefix
==
'scannet'
:
infos_test
=
test_dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
False
)
mmcv
.
dump
(
infos_test
,
test_filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info test file is saved to
{
test_filename
}
'
)
# generate infos for the semantic segmentation task
# e.g. re-sampled scene indexes and label weights
# scene indexes are used to re-sample rooms with different number of points
# label weights are used to balance classes with different number of points
if
pkl_prefix
==
'scannet'
:
# label weight computation function is adopted from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
num_points
=
kwargs
.
get
(
'num_points'
,
8192
)
train_dataset
=
ScanNetSegData
(
data_root
=
data_path
,
ann_file
=
train_filename
,
split
=
'train'
,
num_points
=
num_points
,
label_weight_func
=
lambda
x
:
1.0
/
np
.
log
(
1.2
+
x
))
# TODO: do we need to generate on val set?
val_dataset
=
ScanNetSegData
(
data_root
=
data_path
,
ann_file
=
val_filename
,
split
=
'val'
,
num_points
=
num_points
,
label_weight_func
=
lambda
x
:
1.0
/
np
.
log
(
1.2
+
x
))
# no need to generate for test set
train_dataset
.
get_seg_infos
()
val_dataset
.
get_seg_infos
()
elif
pkl_prefix
==
's3dis'
:
# S3DIS doesn't have a fixed train-val split
# it has 6 areas instead, so we generate info file for each of them
# in training, we will use dataset to wrap different areas
splits
=
[
f
'Area_
{
i
}
'
for
i
in
[
1
,
2
,
3
,
4
,
5
,
6
]]
for
split
in
splits
:
dataset
=
S3DISData
(
root_path
=
data_path
,
split
=
split
)
info
=
dataset
.
get_infos
(
num_workers
=
workers
,
has_label
=
True
)
filename
=
os
.
path
.
join
(
save_path
,
f
'
{
pkl_prefix
}
_infos_
{
split
}
.pkl'
)
mmcv
.
dump
(
info
,
filename
,
'pkl'
)
print
(
f
'
{
pkl_prefix
}
info
{
split
}
file is saved to
{
filename
}
'
)
num_points
=
kwargs
.
get
(
'num_points'
,
4096
)
seg_dataset
=
S3DISSegData
(
data_root
=
data_path
,
ann_file
=
filename
,
split
=
split
,
num_points
=
num_points
,
label_weight_func
=
lambda
x
:
1.0
/
np
.
log
(
1.2
+
x
))
seg_dataset
.
get_seg_infos
()
docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_converter.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
pathlib
import
Path
import
mmcv
import
numpy
as
np
from
nuscenes.utils.geometry_utils
import
view_points
from
mmdet3d.core.bbox
import
box_np_ops
,
points_cam2img
from
.kitti_data_utils
import
WaymoInfoGatherer
,
get_kitti_image_info
from
.nuscenes_converter
import
post_process_coords
kitti_categories
=
(
'Pedestrian'
,
'Cyclist'
,
'Car'
)
def
convert_to_kitti_info_version2
(
info
):
"""convert kitti info v1 to v2 if possible.
Args:
info (dict): Info of the input kitti data.
- image (dict): image info
- calib (dict): calibration info
- point_cloud (dict): point cloud info
"""
if
'image'
not
in
info
or
'calib'
not
in
info
or
'point_cloud'
not
in
info
:
info
[
'image'
]
=
{
'image_shape'
:
info
[
'img_shape'
],
'image_idx'
:
info
[
'image_idx'
],
'image_path'
:
info
[
'img_path'
],
}
info
[
'calib'
]
=
{
'R0_rect'
:
info
[
'calib/R0_rect'
],
'Tr_velo_to_cam'
:
info
[
'calib/Tr_velo_to_cam'
],
'P2'
:
info
[
'calib/P2'
],
}
info
[
'point_cloud'
]
=
{
'velodyne_path'
:
info
[
'velodyne_path'
],
}
def
_read_imageset_file
(
path
):
with
open
(
path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
return
[
int
(
line
)
for
line
in
lines
]
class
_NumPointsInGTCalculater
:
"""Calculate the number of points inside the ground truth box. This is the
parallel version. For the serialized version, please refer to
`_calculate_num_points_in_gt`.
Args:
data_path (str): Path of the data.
relative_path (bool): Whether to use relative path.
remove_outside (bool, optional): Whether to remove points which are
outside of image. Default: True.
num_features (int, optional): Number of features per point.
Default: False.
num_worker (int, optional): the number of parallel workers to use.
Default: 8.
"""
def
__init__
(
self
,
data_path
,
relative_path
,
remove_outside
=
True
,
num_features
=
4
,
num_worker
=
8
)
->
None
:
self
.
data_path
=
data_path
self
.
relative_path
=
relative_path
self
.
remove_outside
=
remove_outside
self
.
num_features
=
num_features
self
.
num_worker
=
num_worker
def
calculate_single
(
self
,
info
):
pc_info
=
info
[
'point_cloud'
]
image_info
=
info
[
'image'
]
calib
=
info
[
'calib'
]
if
self
.
relative_path
:
v_path
=
str
(
Path
(
self
.
data_path
)
/
pc_info
[
'velodyne_path'
])
else
:
v_path
=
pc_info
[
'velodyne_path'
]
points_v
=
np
.
fromfile
(
v_path
,
dtype
=
np
.
float32
,
count
=-
1
).
reshape
([
-
1
,
self
.
num_features
])
rect
=
calib
[
'R0_rect'
]
Trv2c
=
calib
[
'Tr_velo_to_cam'
]
P2
=
calib
[
'P2'
]
if
self
.
remove_outside
:
points_v
=
box_np_ops
.
remove_outside_points
(
points_v
,
rect
,
Trv2c
,
P2
,
image_info
[
'image_shape'
])
annos
=
info
[
'annos'
]
num_obj
=
len
([
n
for
n
in
annos
[
'name'
]
if
n
!=
'DontCare'
])
dims
=
annos
[
'dimensions'
][:
num_obj
]
loc
=
annos
[
'location'
][:
num_obj
]
rots
=
annos
[
'rotation_y'
][:
num_obj
]
gt_boxes_camera
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
)
gt_boxes_lidar
=
box_np_ops
.
box_camera_to_lidar
(
gt_boxes_camera
,
rect
,
Trv2c
)
indices
=
box_np_ops
.
points_in_rbbox
(
points_v
[:,
:
3
],
gt_boxes_lidar
)
num_points_in_gt
=
indices
.
sum
(
0
)
num_ignored
=
len
(
annos
[
'dimensions'
])
-
num_obj
num_points_in_gt
=
np
.
concatenate
(
[
num_points_in_gt
,
-
np
.
ones
([
num_ignored
])])
annos
[
'num_points_in_gt'
]
=
num_points_in_gt
.
astype
(
np
.
int32
)
return
info
def
calculate
(
self
,
infos
):
ret_infos
=
mmcv
.
track_parallel_progress
(
self
.
calculate_single
,
infos
,
self
.
num_worker
)
for
i
,
ret_info
in
enumerate
(
ret_infos
):
infos
[
i
]
=
ret_info
def
_calculate_num_points_in_gt
(
data_path
,
infos
,
relative_path
,
remove_outside
=
True
,
num_features
=
4
):
for
info
in
mmcv
.
track_iter_progress
(
infos
):
pc_info
=
info
[
'point_cloud'
]
image_info
=
info
[
'image'
]
calib
=
info
[
'calib'
]
if
relative_path
:
v_path
=
str
(
Path
(
data_path
)
/
pc_info
[
'velodyne_path'
])
else
:
v_path
=
pc_info
[
'velodyne_path'
]
points_v
=
np
.
fromfile
(
v_path
,
dtype
=
np
.
float32
,
count
=-
1
).
reshape
([
-
1
,
num_features
])
rect
=
calib
[
'R0_rect'
]
Trv2c
=
calib
[
'Tr_velo_to_cam'
]
P2
=
calib
[
'P2'
]
if
remove_outside
:
points_v
=
box_np_ops
.
remove_outside_points
(
points_v
,
rect
,
Trv2c
,
P2
,
image_info
[
'image_shape'
])
# points_v = points_v[points_v[:, 0] > 0]
annos
=
info
[
'annos'
]
num_obj
=
len
([
n
for
n
in
annos
[
'name'
]
if
n
!=
'DontCare'
])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims
=
annos
[
'dimensions'
][:
num_obj
]
loc
=
annos
[
'location'
][:
num_obj
]
rots
=
annos
[
'rotation_y'
][:
num_obj
]
gt_boxes_camera
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
)
gt_boxes_lidar
=
box_np_ops
.
box_camera_to_lidar
(
gt_boxes_camera
,
rect
,
Trv2c
)
indices
=
box_np_ops
.
points_in_rbbox
(
points_v
[:,
:
3
],
gt_boxes_lidar
)
num_points_in_gt
=
indices
.
sum
(
0
)
num_ignored
=
len
(
annos
[
'dimensions'
])
-
num_obj
num_points_in_gt
=
np
.
concatenate
(
[
num_points_in_gt
,
-
np
.
ones
([
num_ignored
])])
annos
[
'num_points_in_gt'
]
=
num_points_in_gt
.
astype
(
np
.
int32
)
def
create_kitti_info_file
(
data_path
,
pkl_prefix
=
'kitti'
,
with_plane
=
False
,
save_path
=
None
,
relative_path
=
True
):
"""Create info file of KITTI dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str, optional): Prefix of the info file to be generated.
Default: 'kitti'.
with_plane (bool, optional): Whether to use plane information.
Default: False.
save_path (str, optional): Path to save the info file.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
"""
imageset_folder
=
Path
(
data_path
)
/
'ImageSets'
train_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'train.txt'
))
val_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'val.txt'
))
test_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'test.txt'
))
print
(
'Generate info. this may take several minutes.'
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
kitti_infos_train
=
get_kitti_image_info
(
data_path
,
training
=
True
,
velodyne
=
True
,
calib
=
True
,
with_plane
=
with_plane
,
image_ids
=
train_img_ids
,
relative_path
=
relative_path
)
_calculate_num_points_in_gt
(
data_path
,
kitti_infos_train
,
relative_path
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
print
(
f
'Kitti info train file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_train
,
filename
)
kitti_infos_val
=
get_kitti_image_info
(
data_path
,
training
=
True
,
velodyne
=
True
,
calib
=
True
,
with_plane
=
with_plane
,
image_ids
=
val_img_ids
,
relative_path
=
relative_path
)
_calculate_num_points_in_gt
(
data_path
,
kitti_infos_val
,
relative_path
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
print
(
f
'Kitti info val file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_val
,
filename
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_trainval.pkl'
print
(
f
'Kitti info trainval file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_train
+
kitti_infos_val
,
filename
)
kitti_infos_test
=
get_kitti_image_info
(
data_path
,
training
=
False
,
label_info
=
False
,
velodyne
=
True
,
calib
=
True
,
with_plane
=
False
,
image_ids
=
test_img_ids
,
relative_path
=
relative_path
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_test.pkl'
print
(
f
'Kitti info test file is saved to
{
filename
}
'
)
mmcv
.
dump
(
kitti_infos_test
,
filename
)
def
create_waymo_info_file
(
data_path
,
pkl_prefix
=
'waymo'
,
save_path
=
None
,
relative_path
=
True
,
max_sweeps
=
5
,
workers
=
8
):
"""Create info file of waymo dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str, optional): Prefix of the info file to be generated.
Default: 'waymo'.
save_path (str, optional): Path to save the info file.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
max_sweeps (int, optional): Max sweeps before the detection frame
to be used. Default: 5.
"""
imageset_folder
=
Path
(
data_path
)
/
'ImageSets'
train_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'train.txt'
))
val_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'val.txt'
))
test_img_ids
=
_read_imageset_file
(
str
(
imageset_folder
/
'test.txt'
))
print
(
'Generate info. this may take several minutes.'
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
waymo_infos_gatherer_trainval
=
WaymoInfoGatherer
(
data_path
,
training
=
True
,
velodyne
=
True
,
calib
=
True
,
pose
=
True
,
relative_path
=
relative_path
,
max_sweeps
=
max_sweeps
,
num_worker
=
workers
)
waymo_infos_gatherer_test
=
WaymoInfoGatherer
(
data_path
,
training
=
False
,
label_info
=
False
,
velodyne
=
True
,
calib
=
True
,
pose
=
True
,
relative_path
=
relative_path
,
max_sweeps
=
max_sweeps
,
num_worker
=
workers
)
num_points_in_gt_calculater
=
_NumPointsInGTCalculater
(
data_path
,
relative_path
,
num_features
=
6
,
remove_outside
=
False
,
num_worker
=
workers
)
waymo_infos_train
=
waymo_infos_gatherer_trainval
.
gather
(
train_img_ids
)
num_points_in_gt_calculater
.
calculate
(
waymo_infos_train
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
print
(
f
'Waymo info train file is saved to
{
filename
}
'
)
mmcv
.
dump
(
waymo_infos_train
,
filename
)
waymo_infos_val
=
waymo_infos_gatherer_trainval
.
gather
(
val_img_ids
)
num_points_in_gt_calculater
.
calculate
(
waymo_infos_val
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
print
(
f
'Waymo info val file is saved to
{
filename
}
'
)
mmcv
.
dump
(
waymo_infos_val
,
filename
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_trainval.pkl'
print
(
f
'Waymo info trainval file is saved to
{
filename
}
'
)
mmcv
.
dump
(
waymo_infos_train
+
waymo_infos_val
,
filename
)
waymo_infos_test
=
waymo_infos_gatherer_test
.
gather
(
test_img_ids
)
filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_test.pkl'
print
(
f
'Waymo info test file is saved to
{
filename
}
'
)
mmcv
.
dump
(
waymo_infos_test
,
filename
)
def
_create_reduced_point_cloud
(
data_path
,
info_path
,
save_path
=
None
,
back
=
False
,
num_features
=
4
,
front_camera_id
=
2
):
"""Create reduced point clouds for given info.
Args:
data_path (str): Path of original data.
info_path (str): Path of data info.
save_path (str, optional): Path to save reduced point cloud
data. Default: None.
back (bool, optional): Whether to flip the points to back.
Default: False.
num_features (int, optional): Number of point features. Default: 4.
front_camera_id (int, optional): The referenced/front camera ID.
Default: 2.
"""
kitti_infos
=
mmcv
.
load
(
info_path
)
for
info
in
mmcv
.
track_iter_progress
(
kitti_infos
):
pc_info
=
info
[
'point_cloud'
]
image_info
=
info
[
'image'
]
calib
=
info
[
'calib'
]
v_path
=
pc_info
[
'velodyne_path'
]
v_path
=
Path
(
data_path
)
/
v_path
points_v
=
np
.
fromfile
(
str
(
v_path
),
dtype
=
np
.
float32
,
count
=-
1
).
reshape
([
-
1
,
num_features
])
rect
=
calib
[
'R0_rect'
]
if
front_camera_id
==
2
:
P2
=
calib
[
'P2'
]
else
:
P2
=
calib
[
f
'P
{
str
(
front_camera_id
)
}
'
]
Trv2c
=
calib
[
'Tr_velo_to_cam'
]
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if
back
:
points_v
[:,
0
]
=
-
points_v
[:,
0
]
points_v
=
box_np_ops
.
remove_outside_points
(
points_v
,
rect
,
Trv2c
,
P2
,
image_info
[
'image_shape'
])
if
save_path
is
None
:
save_dir
=
v_path
.
parent
.
parent
/
(
v_path
.
parent
.
stem
+
'_reduced'
)
if
not
save_dir
.
exists
():
save_dir
.
mkdir
()
save_filename
=
save_dir
/
v_path
.
name
# save_filename = str(v_path) + '_reduced'
if
back
:
save_filename
+=
'_back'
else
:
save_filename
=
str
(
Path
(
save_path
)
/
v_path
.
name
)
if
back
:
save_filename
+=
'_back'
with
open
(
save_filename
,
'w'
)
as
f
:
points_v
.
tofile
(
f
)
def
create_reduced_point_cloud
(
data_path
,
pkl_prefix
,
train_info_path
=
None
,
val_info_path
=
None
,
test_info_path
=
None
,
save_path
=
None
,
with_back
=
False
):
"""Create reduced point clouds for training/validation/testing.
Args:
data_path (str): Path of original data.
pkl_prefix (str): Prefix of info files.
train_info_path (str, optional): Path of training set info.
Default: None.
val_info_path (str, optional): Path of validation set info.
Default: None.
test_info_path (str, optional): Path of test set info.
Default: None.
save_path (str, optional): Path to save reduced point cloud data.
Default: None.
with_back (bool, optional): Whether to flip the points to back.
Default: False.
"""
if
train_info_path
is
None
:
train_info_path
=
Path
(
data_path
)
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
if
val_info_path
is
None
:
val_info_path
=
Path
(
data_path
)
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
if
test_info_path
is
None
:
test_info_path
=
Path
(
data_path
)
/
f
'
{
pkl_prefix
}
_infos_test.pkl'
print
(
'create reduced point cloud for training set'
)
_create_reduced_point_cloud
(
data_path
,
train_info_path
,
save_path
)
print
(
'create reduced point cloud for validation set'
)
_create_reduced_point_cloud
(
data_path
,
val_info_path
,
save_path
)
print
(
'create reduced point cloud for testing set'
)
_create_reduced_point_cloud
(
data_path
,
test_info_path
,
save_path
)
if
with_back
:
_create_reduced_point_cloud
(
data_path
,
train_info_path
,
save_path
,
back
=
True
)
_create_reduced_point_cloud
(
data_path
,
val_info_path
,
save_path
,
back
=
True
)
_create_reduced_point_cloud
(
data_path
,
test_info_path
,
save_path
,
back
=
True
)
def
export_2d_annotation
(
root_path
,
info_path
,
mono3d
=
True
):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
mono3d (bool, optional): Whether to export mono3d annotation.
Default: True.
"""
# get bbox annotations for camera
kitti_infos
=
mmcv
.
load
(
info_path
)
cat2Ids
=
[
dict
(
id
=
kitti_categories
.
index
(
cat_name
),
name
=
cat_name
)
for
cat_name
in
kitti_categories
]
coco_ann_id
=
0
coco_2d_dict
=
dict
(
annotations
=
[],
images
=
[],
categories
=
cat2Ids
)
from
os
import
path
as
osp
for
info
in
mmcv
.
track_iter_progress
(
kitti_infos
):
coco_infos
=
get_2d_boxes
(
info
,
occluded
=
[
0
,
1
,
2
,
3
],
mono3d
=
mono3d
)
(
height
,
width
,
_
)
=
mmcv
.
imread
(
osp
.
join
(
root_path
,
info
[
'image'
][
'image_path'
])).
shape
coco_2d_dict
[
'images'
].
append
(
dict
(
file_name
=
info
[
'image'
][
'image_path'
],
id
=
info
[
'image'
][
'image_idx'
],
Tri2v
=
info
[
'calib'
][
'Tr_imu_to_velo'
],
Trv2c
=
info
[
'calib'
][
'Tr_velo_to_cam'
],
rect
=
info
[
'calib'
][
'R0_rect'
],
cam_intrinsic
=
info
[
'calib'
][
'P2'
],
width
=
width
,
height
=
height
))
for
coco_info
in
coco_infos
:
if
coco_info
is
None
:
continue
# add an empty key for coco format
coco_info
[
'segmentation'
]
=
[]
coco_info
[
'id'
]
=
coco_ann_id
coco_2d_dict
[
'annotations'
].
append
(
coco_info
)
coco_ann_id
+=
1
if
mono3d
:
json_prefix
=
f
'
{
info_path
[:
-
4
]
}
_mono3d'
else
:
json_prefix
=
f
'
{
info_path
[:
-
4
]
}
'
mmcv
.
dump
(
coco_2d_dict
,
f
'
{
json_prefix
}
.coco.json'
)
def
get_2d_boxes
(
info
,
occluded
,
mono3d
=
True
):
"""Get the 2D annotation records for a given info.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
P2
=
info
[
'calib'
][
'P2'
]
repro_recs
=
[]
# if no annotations in info (test dataset), then return
if
'annos'
not
in
info
:
return
repro_recs
# Get all the annotation with the specified visibilties.
ann_dicts
=
info
[
'annos'
]
mask
=
[(
ocld
in
occluded
)
for
ocld
in
ann_dicts
[
'occluded'
]]
for
k
in
ann_dicts
.
keys
():
ann_dicts
[
k
]
=
ann_dicts
[
k
][
mask
]
# convert dict of list to list of dict
ann_recs
=
[]
for
i
in
range
(
len
(
ann_dicts
[
'occluded'
])):
ann_rec
=
{}
for
k
in
ann_dicts
.
keys
():
ann_rec
[
k
]
=
ann_dicts
[
k
][
i
]
ann_recs
.
append
(
ann_rec
)
for
ann_idx
,
ann_rec
in
enumerate
(
ann_recs
):
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
\
f
"
{
info
[
'image'
][
'image_idx'
]
}
.
{
ann_idx
}
"
ann_rec
[
'sample_data_token'
]
=
info
[
'image'
][
'image_idx'
]
sample_data_token
=
info
[
'image'
][
'image_idx'
]
loc
=
ann_rec
[
'location'
][
np
.
newaxis
,
:]
dim
=
ann_rec
[
'dimensions'
][
np
.
newaxis
,
:]
rot
=
ann_rec
[
'rotation_y'
][
np
.
newaxis
,
np
.
newaxis
]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst
=
np
.
array
([
0.5
,
0.5
,
0.5
])
src
=
np
.
array
([
0.5
,
1.0
,
0.5
])
loc
=
loc
+
dim
*
(
dst
-
src
)
offset
=
(
info
[
'calib'
][
'P2'
][
0
,
3
]
-
info
[
'calib'
][
'P0'
][
0
,
3
])
\
/
info
[
'calib'
][
'P2'
][
0
,
0
]
loc_3d
=
np
.
copy
(
loc
)
loc_3d
[
0
,
0
]
+=
offset
gt_bbox_3d
=
np
.
concatenate
([
loc
,
dim
,
rot
],
axis
=
1
).
astype
(
np
.
float32
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box_np_ops
.
center_to_corner_box3d
(
gt_bbox_3d
[:,
:
3
],
gt_bbox_3d
[:,
3
:
6
],
gt_bbox_3d
[:,
6
],
[
0.5
,
0.5
,
0.5
],
axis
=
1
)
corners_3d
=
corners_3d
[
0
].
T
# (1, 8, 3) -> (3, 8)
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
camera_intrinsic
=
P2
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
sample_data_token
,
info
[
'image'
][
'image_path'
])
# If mono3d=True, add 3D annotations in camera coordinates
if
mono3d
and
(
repro_rec
is
not
None
):
repro_rec
[
'bbox_cam3d'
]
=
np
.
concatenate
(
[
loc_3d
,
dim
,
rot
],
axis
=
1
).
astype
(
np
.
float32
).
squeeze
().
tolist
()
repro_rec
[
'velo_cam3d'
]
=
-
1
# no velocity in KITTI
center3d
=
np
.
array
(
loc
).
reshape
([
1
,
3
])
center2d
=
points_cam2img
(
center3d
,
camera_intrinsic
,
with_depth
=
True
)
repro_rec
[
'center2d'
]
=
center2d
.
squeeze
().
tolist
()
# normalized center2D + depth
# samples with depth < 0 will be removed
if
repro_rec
[
'center2d'
][
2
]
<=
0
:
continue
repro_rec
[
'attribute_name'
]
=
-
1
# no attribute in KITTI
repro_rec
[
'attribute_id'
]
=
-
1
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
generate_record
(
ann_rec
,
x1
,
y1
,
x2
,
y2
,
sample_data_token
,
filename
):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec
=
OrderedDict
()
repro_rec
[
'sample_data_token'
]
=
sample_data_token
coco_rec
=
dict
()
key_mapping
=
{
'name'
:
'category_name'
,
'num_points_in_gt'
:
'num_lidar_pts'
,
'sample_annotation_token'
:
'sample_annotation_token'
,
'sample_data_token'
:
'sample_data_token'
,
}
for
key
,
value
in
ann_rec
.
items
():
if
key
in
key_mapping
.
keys
():
repro_rec
[
key_mapping
[
key
]]
=
value
repro_rec
[
'bbox_corners'
]
=
[
x1
,
y1
,
x2
,
y2
]
repro_rec
[
'filename'
]
=
filename
coco_rec
[
'file_name'
]
=
filename
coco_rec
[
'image_id'
]
=
sample_data_token
coco_rec
[
'area'
]
=
(
y2
-
y1
)
*
(
x2
-
x1
)
if
repro_rec
[
'category_name'
]
not
in
kitti_categories
:
return
None
cat_name
=
repro_rec
[
'category_name'
]
coco_rec
[
'category_name'
]
=
cat_name
coco_rec
[
'category_id'
]
=
kitti_categories
.
index
(
cat_name
)
coco_rec
[
'bbox'
]
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
coco_rec
[
'iscrowd'
]
=
0
return
coco_rec
docker-hub/FlashOCC/Flashocc/tools/data_converter/kitti_data_utils.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
concurrent
import
futures
as
futures
from
os
import
path
as
osp
from
pathlib
import
Path
import
mmcv
import
numpy
as
np
from
PIL
import
Image
from
skimage
import
io
def
get_image_index_str
(
img_idx
,
use_prefix_id
=
False
):
if
use_prefix_id
:
return
'{:07d}'
.
format
(
img_idx
)
else
:
return
'{:06d}'
.
format
(
img_idx
)
def
get_kitti_info_path
(
idx
,
prefix
,
info_type
=
'image_2'
,
file_tail
=
'.png'
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
img_idx_str
=
get_image_index_str
(
idx
,
use_prefix_id
)
img_idx_str
+=
file_tail
prefix
=
Path
(
prefix
)
if
training
:
file_path
=
Path
(
'training'
)
/
info_type
/
img_idx_str
else
:
file_path
=
Path
(
'testing'
)
/
info_type
/
img_idx_str
if
exist_check
and
not
(
prefix
/
file_path
).
exists
():
raise
ValueError
(
'file not exist: {}'
.
format
(
file_path
))
if
relative_path
:
return
str
(
file_path
)
else
:
return
str
(
prefix
/
file_path
)
def
get_image_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
info_type
=
'image_2'
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
info_type
,
'.png'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_label_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
info_type
=
'label_2'
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
info_type
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_plane_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
info_type
=
'planes'
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
info_type
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_velodyne_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'velodyne'
,
'.bin'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_calib_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'calib'
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_pose_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'pose'
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_timestamp_path
(
idx
,
prefix
,
training
=
True
,
relative_path
=
True
,
exist_check
=
True
,
use_prefix_id
=
False
):
return
get_kitti_info_path
(
idx
,
prefix
,
'timestamp'
,
'.txt'
,
training
,
relative_path
,
exist_check
,
use_prefix_id
)
def
get_label_anno
(
label_path
):
annotations
=
{}
annotations
.
update
({
'name'
:
[],
'truncated'
:
[],
'occluded'
:
[],
'alpha'
:
[],
'bbox'
:
[],
'dimensions'
:
[],
'location'
:
[],
'rotation_y'
:
[]
})
with
open
(
label_path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
# if len(lines) == 0 or len(lines[0]) < 15:
# content = []
# else:
content
=
[
line
.
strip
().
split
(
' '
)
for
line
in
lines
]
num_objects
=
len
([
x
[
0
]
for
x
in
content
if
x
[
0
]
!=
'DontCare'
])
annotations
[
'name'
]
=
np
.
array
([
x
[
0
]
for
x
in
content
])
num_gt
=
len
(
annotations
[
'name'
])
annotations
[
'truncated'
]
=
np
.
array
([
float
(
x
[
1
])
for
x
in
content
])
annotations
[
'occluded'
]
=
np
.
array
([
int
(
x
[
2
])
for
x
in
content
])
annotations
[
'alpha'
]
=
np
.
array
([
float
(
x
[
3
])
for
x
in
content
])
annotations
[
'bbox'
]
=
np
.
array
([[
float
(
info
)
for
info
in
x
[
4
:
8
]]
for
x
in
content
]).
reshape
(
-
1
,
4
)
# dimensions will convert hwl format to standard lhw(camera) format.
annotations
[
'dimensions'
]
=
np
.
array
([[
float
(
info
)
for
info
in
x
[
8
:
11
]]
for
x
in
content
]).
reshape
(
-
1
,
3
)[:,
[
2
,
0
,
1
]]
annotations
[
'location'
]
=
np
.
array
([[
float
(
info
)
for
info
in
x
[
11
:
14
]]
for
x
in
content
]).
reshape
(
-
1
,
3
)
annotations
[
'rotation_y'
]
=
np
.
array
([
float
(
x
[
14
])
for
x
in
content
]).
reshape
(
-
1
)
if
len
(
content
)
!=
0
and
len
(
content
[
0
])
==
16
:
# have score
annotations
[
'score'
]
=
np
.
array
([
float
(
x
[
15
])
for
x
in
content
])
else
:
annotations
[
'score'
]
=
np
.
zeros
((
annotations
[
'bbox'
].
shape
[
0
],
))
index
=
list
(
range
(
num_objects
))
+
[
-
1
]
*
(
num_gt
-
num_objects
)
annotations
[
'index'
]
=
np
.
array
(
index
,
dtype
=
np
.
int32
)
annotations
[
'group_ids'
]
=
np
.
arange
(
num_gt
,
dtype
=
np
.
int32
)
return
annotations
def
_extend_matrix
(
mat
):
mat
=
np
.
concatenate
([
mat
,
np
.
array
([[
0.
,
0.
,
0.
,
1.
]])],
axis
=
0
)
return
mat
def
get_kitti_image_info
(
path
,
training
=
True
,
label_info
=
True
,
velodyne
=
False
,
calib
=
False
,
with_plane
=
False
,
image_ids
=
7481
,
extend_matrix
=
True
,
num_worker
=
8
,
relative_path
=
True
,
with_imageshape
=
True
):
"""
KITTI annotation format version 2:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 4
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam: ...
P2: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
root_path
=
Path
(
path
)
if
not
isinstance
(
image_ids
,
list
):
image_ids
=
list
(
range
(
image_ids
))
def
map_func
(
idx
):
info
=
{}
pc_info
=
{
'num_features'
:
4
}
calib_info
=
{}
image_info
=
{
'image_idx'
:
idx
}
annotations
=
None
if
velodyne
:
pc_info
[
'velodyne_path'
]
=
get_velodyne_path
(
idx
,
path
,
training
,
relative_path
)
image_info
[
'image_path'
]
=
get_image_path
(
idx
,
path
,
training
,
relative_path
)
if
with_imageshape
:
img_path
=
image_info
[
'image_path'
]
if
relative_path
:
img_path
=
str
(
root_path
/
img_path
)
image_info
[
'image_shape'
]
=
np
.
array
(
io
.
imread
(
img_path
).
shape
[:
2
],
dtype
=
np
.
int32
)
if
label_info
:
label_path
=
get_label_path
(
idx
,
path
,
training
,
relative_path
)
if
relative_path
:
label_path
=
str
(
root_path
/
label_path
)
annotations
=
get_label_anno
(
label_path
)
info
[
'image'
]
=
image_info
info
[
'point_cloud'
]
=
pc_info
if
calib
:
calib_path
=
get_calib_path
(
idx
,
path
,
training
,
relative_path
=
False
)
with
open
(
calib_path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
P0
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
0
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P1
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
1
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P2
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
2
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P3
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
3
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
extend_matrix
:
P0
=
_extend_matrix
(
P0
)
P1
=
_extend_matrix
(
P1
)
P2
=
_extend_matrix
(
P2
)
P3
=
_extend_matrix
(
P3
)
R0_rect
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
4
].
split
(
' '
)[
1
:
10
]
]).
reshape
([
3
,
3
])
if
extend_matrix
:
rect_4x4
=
np
.
zeros
([
4
,
4
],
dtype
=
R0_rect
.
dtype
)
rect_4x4
[
3
,
3
]
=
1.
rect_4x4
[:
3
,
:
3
]
=
R0_rect
else
:
rect_4x4
=
R0_rect
Tr_velo_to_cam
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
5
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
Tr_imu_to_velo
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
6
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
extend_matrix
:
Tr_velo_to_cam
=
_extend_matrix
(
Tr_velo_to_cam
)
Tr_imu_to_velo
=
_extend_matrix
(
Tr_imu_to_velo
)
calib_info
[
'P0'
]
=
P0
calib_info
[
'P1'
]
=
P1
calib_info
[
'P2'
]
=
P2
calib_info
[
'P3'
]
=
P3
calib_info
[
'R0_rect'
]
=
rect_4x4
calib_info
[
'Tr_velo_to_cam'
]
=
Tr_velo_to_cam
calib_info
[
'Tr_imu_to_velo'
]
=
Tr_imu_to_velo
info
[
'calib'
]
=
calib_info
if
with_plane
:
plane_path
=
get_plane_path
(
idx
,
path
,
training
,
relative_path
)
if
relative_path
:
plane_path
=
str
(
root_path
/
plane_path
)
lines
=
mmcv
.
list_from_file
(
plane_path
)
info
[
'plane'
]
=
np
.
array
([
float
(
i
)
for
i
in
lines
[
3
].
split
()])
if
annotations
is
not
None
:
info
[
'annos'
]
=
annotations
add_difficulty_to_annos
(
info
)
return
info
with
futures
.
ThreadPoolExecutor
(
num_worker
)
as
executor
:
image_infos
=
executor
.
map
(
map_func
,
image_ids
)
return
list
(
image_infos
)
class
WaymoInfoGatherer
:
"""
Parallel version of waymo dataset information gathering.
Waymo annotation format version like KITTI:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 6
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam0: ...
P0: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
def
__init__
(
self
,
path
,
training
=
True
,
label_info
=
True
,
velodyne
=
False
,
calib
=
False
,
pose
=
False
,
extend_matrix
=
True
,
num_worker
=
8
,
relative_path
=
True
,
with_imageshape
=
True
,
max_sweeps
=
5
)
->
None
:
self
.
path
=
path
self
.
training
=
training
self
.
label_info
=
label_info
self
.
velodyne
=
velodyne
self
.
calib
=
calib
self
.
pose
=
pose
self
.
extend_matrix
=
extend_matrix
self
.
num_worker
=
num_worker
self
.
relative_path
=
relative_path
self
.
with_imageshape
=
with_imageshape
self
.
max_sweeps
=
max_sweeps
def
gather_single
(
self
,
idx
):
root_path
=
Path
(
self
.
path
)
info
=
{}
pc_info
=
{
'num_features'
:
6
}
calib_info
=
{}
image_info
=
{
'image_idx'
:
idx
}
annotations
=
None
if
self
.
velodyne
:
pc_info
[
'velodyne_path'
]
=
get_velodyne_path
(
idx
,
self
.
path
,
self
.
training
,
self
.
relative_path
,
use_prefix_id
=
True
)
with
open
(
get_timestamp_path
(
idx
,
self
.
path
,
self
.
training
,
relative_path
=
False
,
use_prefix_id
=
True
))
as
f
:
info
[
'timestamp'
]
=
np
.
int64
(
f
.
read
())
image_info
[
'image_path'
]
=
get_image_path
(
idx
,
self
.
path
,
self
.
training
,
self
.
relative_path
,
info_type
=
'image_0'
,
use_prefix_id
=
True
)
if
self
.
with_imageshape
:
img_path
=
image_info
[
'image_path'
]
if
self
.
relative_path
:
img_path
=
str
(
root_path
/
img_path
)
# io using PIL is significantly faster than skimage
w
,
h
=
Image
.
open
(
img_path
).
size
image_info
[
'image_shape'
]
=
np
.
array
((
h
,
w
),
dtype
=
np
.
int32
)
if
self
.
label_info
:
label_path
=
get_label_path
(
idx
,
self
.
path
,
self
.
training
,
self
.
relative_path
,
info_type
=
'label_all'
,
use_prefix_id
=
True
)
if
self
.
relative_path
:
label_path
=
str
(
root_path
/
label_path
)
annotations
=
get_label_anno
(
label_path
)
info
[
'image'
]
=
image_info
info
[
'point_cloud'
]
=
pc_info
if
self
.
calib
:
calib_path
=
get_calib_path
(
idx
,
self
.
path
,
self
.
training
,
relative_path
=
False
,
use_prefix_id
=
True
)
with
open
(
calib_path
,
'r'
)
as
f
:
lines
=
f
.
readlines
()
P0
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
0
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P1
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
1
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P2
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
2
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P3
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
3
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
P4
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
4
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
self
.
extend_matrix
:
P0
=
_extend_matrix
(
P0
)
P1
=
_extend_matrix
(
P1
)
P2
=
_extend_matrix
(
P2
)
P3
=
_extend_matrix
(
P3
)
P4
=
_extend_matrix
(
P4
)
R0_rect
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
5
].
split
(
' '
)[
1
:
10
]
]).
reshape
([
3
,
3
])
if
self
.
extend_matrix
:
rect_4x4
=
np
.
zeros
([
4
,
4
],
dtype
=
R0_rect
.
dtype
)
rect_4x4
[
3
,
3
]
=
1.
rect_4x4
[:
3
,
:
3
]
=
R0_rect
else
:
rect_4x4
=
R0_rect
Tr_velo_to_cam
=
np
.
array
([
float
(
info
)
for
info
in
lines
[
6
].
split
(
' '
)[
1
:
13
]
]).
reshape
([
3
,
4
])
if
self
.
extend_matrix
:
Tr_velo_to_cam
=
_extend_matrix
(
Tr_velo_to_cam
)
calib_info
[
'P0'
]
=
P0
calib_info
[
'P1'
]
=
P1
calib_info
[
'P2'
]
=
P2
calib_info
[
'P3'
]
=
P3
calib_info
[
'P4'
]
=
P4
calib_info
[
'R0_rect'
]
=
rect_4x4
calib_info
[
'Tr_velo_to_cam'
]
=
Tr_velo_to_cam
info
[
'calib'
]
=
calib_info
if
self
.
pose
:
pose_path
=
get_pose_path
(
idx
,
self
.
path
,
self
.
training
,
relative_path
=
False
,
use_prefix_id
=
True
)
info
[
'pose'
]
=
np
.
loadtxt
(
pose_path
)
if
annotations
is
not
None
:
info
[
'annos'
]
=
annotations
info
[
'annos'
][
'camera_id'
]
=
info
[
'annos'
].
pop
(
'score'
)
add_difficulty_to_annos
(
info
)
sweeps
=
[]
prev_idx
=
idx
while
len
(
sweeps
)
<
self
.
max_sweeps
:
prev_info
=
{}
prev_idx
-=
1
prev_info
[
'velodyne_path'
]
=
get_velodyne_path
(
prev_idx
,
self
.
path
,
self
.
training
,
self
.
relative_path
,
exist_check
=
False
,
use_prefix_id
=
True
)
if_prev_exists
=
osp
.
exists
(
Path
(
self
.
path
)
/
prev_info
[
'velodyne_path'
])
if
if_prev_exists
:
with
open
(
get_timestamp_path
(
prev_idx
,
self
.
path
,
self
.
training
,
relative_path
=
False
,
use_prefix_id
=
True
))
as
f
:
prev_info
[
'timestamp'
]
=
np
.
int64
(
f
.
read
())
prev_pose_path
=
get_pose_path
(
prev_idx
,
self
.
path
,
self
.
training
,
relative_path
=
False
,
use_prefix_id
=
True
)
prev_info
[
'pose'
]
=
np
.
loadtxt
(
prev_pose_path
)
sweeps
.
append
(
prev_info
)
else
:
break
info
[
'sweeps'
]
=
sweeps
return
info
def
gather
(
self
,
image_ids
):
if
not
isinstance
(
image_ids
,
list
):
image_ids
=
list
(
range
(
image_ids
))
image_infos
=
mmcv
.
track_parallel_progress
(
self
.
gather_single
,
image_ids
,
self
.
num_worker
)
return
list
(
image_infos
)
def
kitti_anno_to_label_file
(
annos
,
folder
):
folder
=
Path
(
folder
)
for
anno
in
annos
:
image_idx
=
anno
[
'metadata'
][
'image_idx'
]
label_lines
=
[]
for
j
in
range
(
anno
[
'bbox'
].
shape
[
0
]):
label_dict
=
{
'name'
:
anno
[
'name'
][
j
],
'alpha'
:
anno
[
'alpha'
][
j
],
'bbox'
:
anno
[
'bbox'
][
j
],
'location'
:
anno
[
'location'
][
j
],
'dimensions'
:
anno
[
'dimensions'
][
j
],
'rotation_y'
:
anno
[
'rotation_y'
][
j
],
'score'
:
anno
[
'score'
][
j
],
}
label_line
=
kitti_result_line
(
label_dict
)
label_lines
.
append
(
label_line
)
label_file
=
folder
/
f
'
{
get_image_index_str
(
image_idx
)
}
.txt'
label_str
=
'
\n
'
.
join
(
label_lines
)
with
open
(
label_file
,
'w'
)
as
f
:
f
.
write
(
label_str
)
def
add_difficulty_to_annos
(
info
):
min_height
=
[
40
,
25
,
25
]
# minimum height for evaluated groundtruth/detections
max_occlusion
=
[
0
,
1
,
2
]
# maximum occlusion level of the groundtruth used for evaluation
max_trunc
=
[
0.15
,
0.3
,
0.5
]
# maximum truncation level of the groundtruth used for evaluation
annos
=
info
[
'annos'
]
dims
=
annos
[
'dimensions'
]
# lhw format
bbox
=
annos
[
'bbox'
]
height
=
bbox
[:,
3
]
-
bbox
[:,
1
]
occlusion
=
annos
[
'occluded'
]
truncation
=
annos
[
'truncated'
]
diff
=
[]
easy_mask
=
np
.
ones
((
len
(
dims
),
),
dtype
=
np
.
bool
)
moderate_mask
=
np
.
ones
((
len
(
dims
),
),
dtype
=
np
.
bool
)
hard_mask
=
np
.
ones
((
len
(
dims
),
),
dtype
=
np
.
bool
)
i
=
0
for
h
,
o
,
t
in
zip
(
height
,
occlusion
,
truncation
):
if
o
>
max_occlusion
[
0
]
or
h
<=
min_height
[
0
]
or
t
>
max_trunc
[
0
]:
easy_mask
[
i
]
=
False
if
o
>
max_occlusion
[
1
]
or
h
<=
min_height
[
1
]
or
t
>
max_trunc
[
1
]:
moderate_mask
[
i
]
=
False
if
o
>
max_occlusion
[
2
]
or
h
<=
min_height
[
2
]
or
t
>
max_trunc
[
2
]:
hard_mask
[
i
]
=
False
i
+=
1
is_easy
=
easy_mask
is_moderate
=
np
.
logical_xor
(
easy_mask
,
moderate_mask
)
is_hard
=
np
.
logical_xor
(
hard_mask
,
moderate_mask
)
for
i
in
range
(
len
(
dims
)):
if
is_easy
[
i
]:
diff
.
append
(
0
)
elif
is_moderate
[
i
]:
diff
.
append
(
1
)
elif
is_hard
[
i
]:
diff
.
append
(
2
)
else
:
diff
.
append
(
-
1
)
annos
[
'difficulty'
]
=
np
.
array
(
diff
,
np
.
int32
)
return
diff
def
kitti_result_line
(
result_dict
,
precision
=
4
):
prec_float
=
'{'
+
':.{}f'
.
format
(
precision
)
+
'}'
res_line
=
[]
all_field_default
=
OrderedDict
([
(
'name'
,
None
),
(
'truncated'
,
-
1
),
(
'occluded'
,
-
1
),
(
'alpha'
,
-
10
),
(
'bbox'
,
None
),
(
'dimensions'
,
[
-
1
,
-
1
,
-
1
]),
(
'location'
,
[
-
1000
,
-
1000
,
-
1000
]),
(
'rotation_y'
,
-
10
),
(
'score'
,
0.0
),
])
res_dict
=
[(
key
,
None
)
for
key
,
val
in
all_field_default
.
items
()]
res_dict
=
OrderedDict
(
res_dict
)
for
key
,
val
in
result_dict
.
items
():
if
all_field_default
[
key
]
is
None
and
val
is
None
:
raise
ValueError
(
'you must specify a value for {}'
.
format
(
key
))
res_dict
[
key
]
=
val
for
key
,
val
in
res_dict
.
items
():
if
key
==
'name'
:
res_line
.
append
(
val
)
elif
key
in
[
'truncated'
,
'alpha'
,
'rotation_y'
,
'score'
]:
if
val
is
None
:
res_line
.
append
(
str
(
all_field_default
[
key
]))
else
:
res_line
.
append
(
prec_float
.
format
(
val
))
elif
key
==
'occluded'
:
if
val
is
None
:
res_line
.
append
(
str
(
all_field_default
[
key
]))
else
:
res_line
.
append
(
'{}'
.
format
(
val
))
elif
key
in
[
'bbox'
,
'dimensions'
,
'location'
]:
if
val
is
None
:
res_line
+=
[
str
(
v
)
for
v
in
all_field_default
[
key
]]
else
:
res_line
+=
[
prec_float
.
format
(
v
)
for
v
in
val
]
else
:
raise
ValueError
(
'unknown key. supported key:{}'
.
format
(
res_dict
.
keys
()))
return
' '
.
join
(
res_line
)
docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_converter.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
os
from
logging
import
warning
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
from
lyft_dataset_sdk.lyftdataset
import
LyftDataset
as
Lyft
from
pyquaternion
import
Quaternion
from
mmdet3d.datasets
import
LyftDataset
from
.nuscenes_converter
import
(
get_2d_boxes
,
get_available_scenes
,
obtain_sensor2top
)
lyft_categories
=
(
'car'
,
'truck'
,
'bus'
,
'emergency_vehicle'
,
'other_vehicle'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'animal'
)
def
create_lyft_infos
(
root_path
,
info_prefix
,
version
=
'v1.01-train'
,
max_sweeps
=
10
):
"""Create info file of lyft dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
version (str, optional): Version of the data.
Default: 'v1.01-train'.
max_sweeps (int, optional): Max number of sweeps.
Default: 10.
"""
lyft
=
Lyft
(
data_path
=
osp
.
join
(
root_path
,
version
),
json_path
=
osp
.
join
(
root_path
,
version
,
version
),
verbose
=
True
)
available_vers
=
[
'v1.01-train'
,
'v1.01-test'
]
assert
version
in
available_vers
if
version
==
'v1.01-train'
:
train_scenes
=
mmcv
.
list_from_file
(
'data/lyft/train.txt'
)
val_scenes
=
mmcv
.
list_from_file
(
'data/lyft/val.txt'
)
elif
version
==
'v1.01-test'
:
train_scenes
=
mmcv
.
list_from_file
(
'data/lyft/test.txt'
)
val_scenes
=
[]
else
:
raise
ValueError
(
'unknown'
)
# filter existing scenes.
available_scenes
=
get_available_scenes
(
lyft
)
available_scene_names
=
[
s
[
'name'
]
for
s
in
available_scenes
]
train_scenes
=
list
(
filter
(
lambda
x
:
x
in
available_scene_names
,
train_scenes
))
val_scenes
=
list
(
filter
(
lambda
x
:
x
in
available_scene_names
,
val_scenes
))
train_scenes
=
set
([
available_scenes
[
available_scene_names
.
index
(
s
)][
'token'
]
for
s
in
train_scenes
])
val_scenes
=
set
([
available_scenes
[
available_scene_names
.
index
(
s
)][
'token'
]
for
s
in
val_scenes
])
test
=
'test'
in
version
if
test
:
print
(
f
'test scene:
{
len
(
train_scenes
)
}
'
)
else
:
print
(
f
'train scene:
{
len
(
train_scenes
)
}
,
\
val scene:
{
len
(
val_scenes
)
}
'
)
train_lyft_infos
,
val_lyft_infos
=
_fill_trainval_infos
(
lyft
,
train_scenes
,
val_scenes
,
test
,
max_sweeps
=
max_sweeps
)
metadata
=
dict
(
version
=
version
)
if
test
:
print
(
f
'test sample:
{
len
(
train_lyft_infos
)
}
'
)
data
=
dict
(
infos
=
train_lyft_infos
,
metadata
=
metadata
)
info_name
=
f
'
{
info_prefix
}
_infos_test'
info_path
=
osp
.
join
(
root_path
,
f
'
{
info_name
}
.pkl'
)
mmcv
.
dump
(
data
,
info_path
)
else
:
print
(
f
'train sample:
{
len
(
train_lyft_infos
)
}
,
\
val sample:
{
len
(
val_lyft_infos
)
}
'
)
data
=
dict
(
infos
=
train_lyft_infos
,
metadata
=
metadata
)
train_info_name
=
f
'
{
info_prefix
}
_infos_train'
info_path
=
osp
.
join
(
root_path
,
f
'
{
train_info_name
}
.pkl'
)
mmcv
.
dump
(
data
,
info_path
)
data
[
'infos'
]
=
val_lyft_infos
val_info_name
=
f
'
{
info_prefix
}
_infos_val'
info_val_path
=
osp
.
join
(
root_path
,
f
'
{
val_info_name
}
.pkl'
)
mmcv
.
dump
(
data
,
info_val_path
)
def
_fill_trainval_infos
(
lyft
,
train_scenes
,
val_scenes
,
test
=
False
,
max_sweeps
=
10
):
"""Generate the train/val infos from the raw data.
Args:
lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.
train_scenes (list[str]): Basic information of training scenes.
val_scenes (list[str]): Basic information of validation scenes.
test (bool, optional): Whether use the test mode. In the test mode, no
annotations can be accessed. Default: False.
max_sweeps (int, optional): Max number of sweeps. Default: 10.
Returns:
tuple[list[dict]]: Information of training set and
validation set that will be saved to the info file.
"""
train_lyft_infos
=
[]
val_lyft_infos
=
[]
for
sample
in
mmcv
.
track_iter_progress
(
lyft
.
sample
):
lidar_token
=
sample
[
'data'
][
'LIDAR_TOP'
]
sd_rec
=
lyft
.
get
(
'sample_data'
,
sample
[
'data'
][
'LIDAR_TOP'
])
cs_record
=
lyft
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_record
=
lyft
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
abs_lidar_path
,
boxes
,
_
=
lyft
.
get_sample_data
(
lidar_token
)
# nuScenes devkit returns more convenient relative paths while
# lyft devkit returns absolute paths
abs_lidar_path
=
str
(
abs_lidar_path
)
# absolute path
lidar_path
=
abs_lidar_path
.
split
(
f
'
{
os
.
getcwd
()
}
/'
)[
-
1
]
# relative path
mmcv
.
check_file_exist
(
lidar_path
)
info
=
{
'lidar_path'
:
lidar_path
,
'token'
:
sample
[
'token'
],
'sweeps'
:
[],
'cams'
:
dict
(),
'lidar2ego_translation'
:
cs_record
[
'translation'
],
'lidar2ego_rotation'
:
cs_record
[
'rotation'
],
'ego2global_translation'
:
pose_record
[
'translation'
],
'ego2global_rotation'
:
pose_record
[
'rotation'
],
'timestamp'
:
sample
[
'timestamp'
],
}
l2e_r
=
info
[
'lidar2ego_rotation'
]
l2e_t
=
info
[
'lidar2ego_translation'
]
e2g_r
=
info
[
'ego2global_rotation'
]
e2g_t
=
info
[
'ego2global_translation'
]
l2e_r_mat
=
Quaternion
(
l2e_r
).
rotation_matrix
e2g_r_mat
=
Quaternion
(
e2g_r
).
rotation_matrix
# obtain 6 image's information per frame
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
for
cam
in
camera_types
:
cam_token
=
sample
[
'data'
][
cam
]
cam_path
,
_
,
cam_intrinsic
=
lyft
.
get_sample_data
(
cam_token
)
cam_info
=
obtain_sensor2top
(
lyft
,
cam_token
,
l2e_t
,
l2e_r_mat
,
e2g_t
,
e2g_r_mat
,
cam
)
cam_info
.
update
(
cam_intrinsic
=
cam_intrinsic
)
info
[
'cams'
].
update
({
cam
:
cam_info
})
# obtain sweeps for a single key-frame
sd_rec
=
lyft
.
get
(
'sample_data'
,
sample
[
'data'
][
'LIDAR_TOP'
])
sweeps
=
[]
while
len
(
sweeps
)
<
max_sweeps
:
if
not
sd_rec
[
'prev'
]
==
''
:
sweep
=
obtain_sensor2top
(
lyft
,
sd_rec
[
'prev'
],
l2e_t
,
l2e_r_mat
,
e2g_t
,
e2g_r_mat
,
'lidar'
)
sweeps
.
append
(
sweep
)
sd_rec
=
lyft
.
get
(
'sample_data'
,
sd_rec
[
'prev'
])
else
:
break
info
[
'sweeps'
]
=
sweeps
# obtain annotation
if
not
test
:
annotations
=
[
lyft
.
get
(
'sample_annotation'
,
token
)
for
token
in
sample
[
'anns'
]
]
locs
=
np
.
array
([
b
.
center
for
b
in
boxes
]).
reshape
(
-
1
,
3
)
dims
=
np
.
array
([
b
.
wlh
for
b
in
boxes
]).
reshape
(
-
1
,
3
)
rots
=
np
.
array
([
b
.
orientation
.
yaw_pitch_roll
[
0
]
for
b
in
boxes
]).
reshape
(
-
1
,
1
)
names
=
[
b
.
name
for
b
in
boxes
]
for
i
in
range
(
len
(
names
)):
if
names
[
i
]
in
LyftDataset
.
NameMapping
:
names
[
i
]
=
LyftDataset
.
NameMapping
[
names
[
i
]]
names
=
np
.
array
(
names
)
# we need to convert box size to
# the format of our lidar coordinate system
# which is x_size, y_size, z_size (corresponding to l, w, h)
gt_boxes
=
np
.
concatenate
([
locs
,
dims
[:,
[
1
,
0
,
2
]],
rots
],
axis
=
1
)
assert
len
(
gt_boxes
)
==
len
(
annotations
),
f
'
{
len
(
gt_boxes
)
}
,
{
len
(
annotations
)
}
'
info
[
'gt_boxes'
]
=
gt_boxes
info
[
'gt_names'
]
=
names
info
[
'num_lidar_pts'
]
=
np
.
array
(
[
a
[
'num_lidar_pts'
]
for
a
in
annotations
])
info
[
'num_radar_pts'
]
=
np
.
array
(
[
a
[
'num_radar_pts'
]
for
a
in
annotations
])
if
sample
[
'scene_token'
]
in
train_scenes
:
train_lyft_infos
.
append
(
info
)
else
:
val_lyft_infos
.
append
(
info
)
return
train_lyft_infos
,
val_lyft_infos
def
export_2d_annotation
(
root_path
,
info_path
,
version
):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
version (str): Dataset version.
"""
warning
.
warn
(
'DeprecationWarning: 2D annotations are not used on the '
'Lyft dataset. The function export_2d_annotation will be '
'deprecated.'
)
# get bbox annotations for camera
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
lyft_infos
=
mmcv
.
load
(
info_path
)[
'infos'
]
lyft
=
Lyft
(
data_path
=
osp
.
join
(
root_path
,
version
),
json_path
=
osp
.
join
(
root_path
,
version
,
version
),
verbose
=
True
)
# info_2d_list = []
cat2Ids
=
[
dict
(
id
=
lyft_categories
.
index
(
cat_name
),
name
=
cat_name
)
for
cat_name
in
lyft_categories
]
coco_ann_id
=
0
coco_2d_dict
=
dict
(
annotations
=
[],
images
=
[],
categories
=
cat2Ids
)
for
info
in
mmcv
.
track_iter_progress
(
lyft_infos
):
for
cam
in
camera_types
:
cam_info
=
info
[
'cams'
][
cam
]
coco_infos
=
get_2d_boxes
(
lyft
,
cam_info
[
'sample_data_token'
],
visibilities
=
[
''
,
'1'
,
'2'
,
'3'
,
'4'
])
(
height
,
width
,
_
)
=
mmcv
.
imread
(
cam_info
[
'data_path'
]).
shape
coco_2d_dict
[
'images'
].
append
(
dict
(
file_name
=
cam_info
[
'data_path'
],
id
=
cam_info
[
'sample_data_token'
],
width
=
width
,
height
=
height
))
for
coco_info
in
coco_infos
:
if
coco_info
is
None
:
continue
# add an empty key for coco format
coco_info
[
'segmentation'
]
=
[]
coco_info
[
'id'
]
=
coco_ann_id
coco_2d_dict
[
'annotations'
].
append
(
coco_info
)
coco_ann_id
+=
1
mmcv
.
dump
(
coco_2d_dict
,
f
'
{
info_path
[:
-
4
]
}
.coco.json'
)
docker-hub/FlashOCC/Flashocc/tools/data_converter/lyft_data_fixer.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
os
import
numpy
as
np
def
fix_lyft
(
root_folder
=
'./data/lyft'
,
version
=
'v1.01'
):
# refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa
lidar_path
=
'lidar/host-a011_lidar1_1233090652702363606.bin'
root_folder
=
os
.
path
.
join
(
root_folder
,
f
'
{
version
}
-train'
)
lidar_path
=
os
.
path
.
join
(
root_folder
,
lidar_path
)
assert
os
.
path
.
isfile
(
lidar_path
),
f
'Please download the complete Lyft '
\
f
'dataset and make sure
{
lidar_path
}
is present.'
points
=
np
.
fromfile
(
lidar_path
,
dtype
=
np
.
float32
,
count
=-
1
)
try
:
points
.
reshape
([
-
1
,
5
])
print
(
f
'This fix is not required for version
{
version
}
.'
)
except
ValueError
:
new_points
=
np
.
array
(
list
(
points
)
+
[
100.0
,
1.0
],
dtype
=
'float32'
)
new_points
.
tofile
(
lidar_path
)
print
(
f
'Appended 100.0 and 1.0 to the end of
{
lidar_path
}
.'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Lyft dataset fixer arg parser'
)
parser
.
add_argument
(
'--root-folder'
,
type
=
str
,
default
=
'./data/lyft'
,
help
=
'specify the root path of Lyft dataset'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
default
=
'v1.01'
,
help
=
'specify Lyft dataset version'
)
args
=
parser
.
parse_args
()
if
__name__
==
'__main__'
:
fix_lyft
(
root_folder
=
args
.
root_folder
,
version
=
args
.
version
)
docker-hub/FlashOCC/Flashocc/tools/data_converter/nuimage_converter.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
base64
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
from
nuimages
import
NuImages
from
nuimages.utils.utils
import
mask_decode
,
name_to_index_mapping
nus_categories
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
NAME_MAPPING
=
{
'movable_object.barrier'
:
'barrier'
,
'vehicle.bicycle'
:
'bicycle'
,
'vehicle.bus.bendy'
:
'bus'
,
'vehicle.bus.rigid'
:
'bus'
,
'vehicle.car'
:
'car'
,
'vehicle.construction'
:
'construction_vehicle'
,
'vehicle.motorcycle'
:
'motorcycle'
,
'human.pedestrian.adult'
:
'pedestrian'
,
'human.pedestrian.child'
:
'pedestrian'
,
'human.pedestrian.construction_worker'
:
'pedestrian'
,
'human.pedestrian.police_officer'
:
'pedestrian'
,
'movable_object.trafficcone'
:
'traffic_cone'
,
'vehicle.trailer'
:
'trailer'
,
'vehicle.truck'
:
'truck'
,
}
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Data converter arg parser'
)
parser
.
add_argument
(
'--data-root'
,
type
=
str
,
default
=
'./data/nuimages'
,
help
=
'specify the root path of dataset'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
nargs
=
'+'
,
default
=
[
'v1.0-mini'
],
required
=
False
,
help
=
'specify the dataset version'
)
parser
.
add_argument
(
'--out-dir'
,
type
=
str
,
default
=
'./data/nuimages/annotations/'
,
required
=
False
,
help
=
'path to save the exported json'
)
parser
.
add_argument
(
'--nproc'
,
type
=
int
,
default
=
4
,
required
=
False
,
help
=
'workers to process semantic masks'
)
parser
.
add_argument
(
'--extra-tag'
,
type
=
str
,
default
=
'nuimages'
)
args
=
parser
.
parse_args
()
return
args
def
get_img_annos
(
nuim
,
img_info
,
cat2id
,
out_dir
,
data_root
,
seg_root
):
"""Get semantic segmentation map for an image.
Args:
nuim (obj:`NuImages`): NuImages dataset object
img_info (dict): Meta information of img
Returns:
np.ndarray: Semantic segmentation map of the image
"""
sd_token
=
img_info
[
'token'
]
image_id
=
img_info
[
'id'
]
name_to_index
=
name_to_index_mapping
(
nuim
.
category
)
# Get image data.
width
,
height
=
img_info
[
'width'
],
img_info
[
'height'
]
semseg_mask
=
np
.
zeros
((
height
,
width
)).
astype
(
'uint8'
)
# Load stuff / surface regions.
surface_anns
=
[
o
for
o
in
nuim
.
surface_ann
if
o
[
'sample_data_token'
]
==
sd_token
]
# Draw stuff / surface regions.
for
ann
in
surface_anns
:
# Get color and mask.
category_token
=
ann
[
'category_token'
]
category_name
=
nuim
.
get
(
'category'
,
category_token
)[
'name'
]
if
ann
[
'mask'
]
is
None
:
continue
mask
=
mask_decode
(
ann
[
'mask'
])
# Draw mask for semantic segmentation.
semseg_mask
[
mask
==
1
]
=
name_to_index
[
category_name
]
# Load object instances.
object_anns
=
[
o
for
o
in
nuim
.
object_ann
if
o
[
'sample_data_token'
]
==
sd_token
]
# Sort by token to ensure that objects always appear in the
# instance mask in the same order.
object_anns
=
sorted
(
object_anns
,
key
=
lambda
k
:
k
[
'token'
])
# Draw object instances.
# The 0 index is reserved for background; thus, the instances
# should start from index 1.
annotations
=
[]
for
i
,
ann
in
enumerate
(
object_anns
,
start
=
1
):
# Get color, box, mask and name.
category_token
=
ann
[
'category_token'
]
category_name
=
nuim
.
get
(
'category'
,
category_token
)[
'name'
]
if
ann
[
'mask'
]
is
None
:
continue
mask
=
mask_decode
(
ann
[
'mask'
])
# Draw masks for semantic segmentation and instance segmentation.
semseg_mask
[
mask
==
1
]
=
name_to_index
[
category_name
]
if
category_name
in
NAME_MAPPING
:
cat_name
=
NAME_MAPPING
[
category_name
]
cat_id
=
cat2id
[
cat_name
]
x_min
,
y_min
,
x_max
,
y_max
=
ann
[
'bbox'
]
# encode calibrated instance mask
mask_anno
=
dict
()
mask_anno
[
'counts'
]
=
base64
.
b64decode
(
ann
[
'mask'
][
'counts'
]).
decode
()
mask_anno
[
'size'
]
=
ann
[
'mask'
][
'size'
]
data_anno
=
dict
(
image_id
=
image_id
,
category_id
=
cat_id
,
bbox
=
[
x_min
,
y_min
,
x_max
-
x_min
,
y_max
-
y_min
],
area
=
(
x_max
-
x_min
)
*
(
y_max
-
y_min
),
segmentation
=
mask_anno
,
iscrowd
=
0
)
annotations
.
append
(
data_anno
)
# after process, save semantic masks
img_filename
=
img_info
[
'file_name'
]
seg_filename
=
img_filename
.
replace
(
'jpg'
,
'png'
)
seg_filename
=
osp
.
join
(
seg_root
,
seg_filename
)
mmcv
.
imwrite
(
semseg_mask
,
seg_filename
)
return
annotations
,
np
.
max
(
semseg_mask
)
def
export_nuim_to_coco
(
nuim
,
data_root
,
out_dir
,
extra_tag
,
version
,
nproc
):
print
(
'Process category information'
)
categories
=
[]
categories
=
[
dict
(
id
=
nus_categories
.
index
(
cat_name
),
name
=
cat_name
)
for
cat_name
in
nus_categories
]
cat2id
=
{
k_v
[
'name'
]:
k_v
[
'id'
]
for
k_v
in
categories
}
images
=
[]
print
(
'Process image meta information...'
)
for
sample_info
in
mmcv
.
track_iter_progress
(
nuim
.
sample_data
):
if
sample_info
[
'is_key_frame'
]:
img_idx
=
len
(
images
)
images
.
append
(
dict
(
id
=
img_idx
,
token
=
sample_info
[
'token'
],
file_name
=
sample_info
[
'filename'
],
width
=
sample_info
[
'width'
],
height
=
sample_info
[
'height'
]))
seg_root
=
f
'
{
out_dir
}
semantic_masks'
mmcv
.
mkdir_or_exist
(
seg_root
)
mmcv
.
mkdir_or_exist
(
osp
.
join
(
data_root
,
'calibrated'
))
global
process_img_anno
def
process_img_anno
(
img_info
):
single_img_annos
,
max_cls_id
=
get_img_annos
(
nuim
,
img_info
,
cat2id
,
out_dir
,
data_root
,
seg_root
)
return
single_img_annos
,
max_cls_id
print
(
'Process img annotations...'
)
if
nproc
>
1
:
outputs
=
mmcv
.
track_parallel_progress
(
process_img_anno
,
images
,
nproc
=
nproc
)
else
:
outputs
=
[]
for
img_info
in
mmcv
.
track_iter_progress
(
images
):
outputs
.
append
(
process_img_anno
(
img_info
))
# Determine the index of object annotation
print
(
'Process annotation information...'
)
annotations
=
[]
max_cls_ids
=
[]
for
single_img_annos
,
max_cls_id
in
outputs
:
max_cls_ids
.
append
(
max_cls_id
)
for
img_anno
in
single_img_annos
:
img_anno
.
update
(
id
=
len
(
annotations
))
annotations
.
append
(
img_anno
)
max_cls_id
=
max
(
max_cls_ids
)
print
(
f
'Max ID of class in the semantic map:
{
max_cls_id
}
'
)
coco_format_json
=
dict
(
images
=
images
,
annotations
=
annotations
,
categories
=
categories
)
mmcv
.
mkdir_or_exist
(
out_dir
)
out_file
=
osp
.
join
(
out_dir
,
f
'
{
extra_tag
}
_
{
version
}
.json'
)
print
(
f
'Annotation dumped to
{
out_file
}
'
)
mmcv
.
dump
(
coco_format_json
,
out_file
)
def
main
():
args
=
parse_args
()
for
version
in
args
.
version
:
nuim
=
NuImages
(
dataroot
=
args
.
data_root
,
version
=
version
,
verbose
=
True
,
lazy
=
True
)
export_nuim_to_coco
(
nuim
,
args
.
data_root
,
args
.
out_dir
,
args
.
extra_tag
,
version
,
args
.
nproc
)
if
__name__
==
'__main__'
:
main
()
docker-hub/FlashOCC/Flashocc/tools/data_converter/nuscenes_converter.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
os
from
collections
import
OrderedDict
from
os
import
path
as
osp
from
typing
import
List
,
Tuple
,
Union
import
mmcv
import
numpy
as
np
from
nuscenes.nuscenes
import
NuScenes
from
nuscenes.utils.geometry_utils
import
view_points
from
pyquaternion
import
Quaternion
from
shapely.geometry
import
MultiPoint
,
box
from
mmdet3d.core.bbox
import
points_cam2img
from
mmdet3d.datasets
import
NuScenesDataset
nus_categories
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
nus_attributes
=
(
'cycle.with_rider'
,
'cycle.without_rider'
,
'pedestrian.moving'
,
'pedestrian.standing'
,
'pedestrian.sitting_lying_down'
,
'vehicle.moving'
,
'vehicle.parked'
,
'vehicle.stopped'
,
'None'
)
def
create_nuscenes_infos
(
root_path
,
info_prefix
,
version
=
'v1.0-trainval'
,
max_sweeps
=
10
):
"""Create info file of nuscene dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
version (str, optional): Version of the data.
Default: 'v1.0-trainval'.
max_sweeps (int, optional): Max number of sweeps.
Default: 10.
"""
from
nuscenes.nuscenes
import
NuScenes
nusc
=
NuScenes
(
version
=
version
,
dataroot
=
root_path
,
verbose
=
True
)
from
nuscenes.utils
import
splits
available_vers
=
[
'v1.0-trainval'
,
'v1.0-test'
,
'v1.0-mini'
]
assert
version
in
available_vers
if
version
==
'v1.0-trainval'
:
train_scenes
=
splits
.
train
val_scenes
=
splits
.
val
elif
version
==
'v1.0-test'
:
train_scenes
=
splits
.
test
val_scenes
=
[]
elif
version
==
'v1.0-mini'
:
train_scenes
=
splits
.
mini_train
val_scenes
=
splits
.
mini_val
else
:
raise
ValueError
(
'unknown'
)
# filter existing scenes.
available_scenes
=
get_available_scenes
(
nusc
)
available_scene_names
=
[
s
[
'name'
]
for
s
in
available_scenes
]
train_scenes
=
list
(
filter
(
lambda
x
:
x
in
available_scene_names
,
train_scenes
))
val_scenes
=
list
(
filter
(
lambda
x
:
x
in
available_scene_names
,
val_scenes
))
train_scenes
=
set
([
available_scenes
[
available_scene_names
.
index
(
s
)][
'token'
]
for
s
in
train_scenes
])
val_scenes
=
set
([
available_scenes
[
available_scene_names
.
index
(
s
)][
'token'
]
for
s
in
val_scenes
])
test
=
'test'
in
version
if
test
:
print
(
'test scene: {}'
.
format
(
len
(
train_scenes
)))
else
:
print
(
'train scene: {}, val scene: {}'
.
format
(
len
(
train_scenes
),
len
(
val_scenes
)))
train_nusc_infos
,
val_nusc_infos
=
_fill_trainval_infos
(
nusc
,
train_scenes
,
val_scenes
,
test
,
max_sweeps
=
max_sweeps
)
metadata
=
dict
(
version
=
version
)
if
test
:
print
(
'test sample: {}'
.
format
(
len
(
train_nusc_infos
)))
data
=
dict
(
infos
=
train_nusc_infos
,
metadata
=
metadata
)
info_path
=
osp
.
join
(
root_path
,
'{}_infos_test.pkl'
.
format
(
info_prefix
))
mmcv
.
dump
(
data
,
info_path
)
else
:
print
(
'train sample: {}, val sample: {}'
.
format
(
len
(
train_nusc_infos
),
len
(
val_nusc_infos
)))
data
=
dict
(
infos
=
train_nusc_infos
,
metadata
=
metadata
)
info_path
=
osp
.
join
(
root_path
,
'{}_infos_train.pkl'
.
format
(
info_prefix
))
mmcv
.
dump
(
data
,
info_path
)
data
[
'infos'
]
=
val_nusc_infos
info_val_path
=
osp
.
join
(
root_path
,
'{}_infos_val.pkl'
.
format
(
info_prefix
))
mmcv
.
dump
(
data
,
info_val_path
)
def
get_available_scenes
(
nusc
):
"""Get available scenes from the input nuscenes class.
Given the raw data, get the information of available scenes for
further info generation.
Args:
nusc (class): Dataset class in the nuScenes dataset.
Returns:
available_scenes (list[dict]): List of basic information for the
available scenes.
"""
available_scenes
=
[]
print
(
'total scene num: {}'
.
format
(
len
(
nusc
.
scene
)))
for
scene
in
nusc
.
scene
:
scene_token
=
scene
[
'token'
]
scene_rec
=
nusc
.
get
(
'scene'
,
scene_token
)
sample_rec
=
nusc
.
get
(
'sample'
,
scene_rec
[
'first_sample_token'
])
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample_rec
[
'data'
][
'LIDAR_TOP'
])
has_more_frames
=
True
scene_not_exist
=
False
while
has_more_frames
:
lidar_path
,
boxes
,
_
=
nusc
.
get_sample_data
(
sd_rec
[
'token'
])
lidar_path
=
str
(
lidar_path
)
if
os
.
getcwd
()
in
lidar_path
:
# path from lyftdataset is absolute path
lidar_path
=
lidar_path
.
split
(
f
'
{
os
.
getcwd
()
}
/'
)[
-
1
]
# relative path
if
not
mmcv
.
is_filepath
(
lidar_path
):
scene_not_exist
=
True
break
else
:
break
if
scene_not_exist
:
continue
available_scenes
.
append
(
scene
)
print
(
'exist scene num: {}'
.
format
(
len
(
available_scenes
)))
return
available_scenes
def
_fill_trainval_infos
(
nusc
,
train_scenes
,
val_scenes
,
test
=
False
,
max_sweeps
=
10
):
"""Generate the train/val infos from the raw data.
Args:
nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
train_scenes (list[str]): Basic information of training scenes.
val_scenes (list[str]): Basic information of validation scenes.
test (bool, optional): Whether use the test mode. In test mode, no
annotations can be accessed. Default: False.
max_sweeps (int, optional): Max number of sweeps. Default: 10.
Returns:
tuple[list[dict]]: Information of training set and validation set
that will be saved to the info file.
"""
train_nusc_infos
=
[]
val_nusc_infos
=
[]
for
sample
in
mmcv
.
track_iter_progress
(
nusc
.
sample
):
lidar_token
=
sample
[
'data'
][
'LIDAR_TOP'
]
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample
[
'data'
][
'LIDAR_TOP'
])
cs_record
=
nusc
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_record
=
nusc
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
lidar_path
,
boxes
,
_
=
nusc
.
get_sample_data
(
lidar_token
)
mmcv
.
check_file_exist
(
lidar_path
)
info
=
{
'lidar_path'
:
lidar_path
,
'token'
:
sample
[
'token'
],
'sweeps'
:
[],
'cams'
:
dict
(),
'lidar2ego_translation'
:
cs_record
[
'translation'
],
'lidar2ego_rotation'
:
cs_record
[
'rotation'
],
'ego2global_translation'
:
pose_record
[
'translation'
],
'ego2global_rotation'
:
pose_record
[
'rotation'
],
'timestamp'
:
sample
[
'timestamp'
],
}
l2e_r
=
info
[
'lidar2ego_rotation'
]
l2e_t
=
info
[
'lidar2ego_translation'
]
e2g_r
=
info
[
'ego2global_rotation'
]
e2g_t
=
info
[
'ego2global_translation'
]
l2e_r_mat
=
Quaternion
(
l2e_r
).
rotation_matrix
e2g_r_mat
=
Quaternion
(
e2g_r
).
rotation_matrix
# obtain 6 image's information per frame
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
for
cam
in
camera_types
:
cam_token
=
sample
[
'data'
][
cam
]
cam_path
,
_
,
cam_intrinsic
=
nusc
.
get_sample_data
(
cam_token
)
cam_info
=
obtain_sensor2top
(
nusc
,
cam_token
,
l2e_t
,
l2e_r_mat
,
e2g_t
,
e2g_r_mat
,
cam
)
cam_info
.
update
(
cam_intrinsic
=
cam_intrinsic
)
info
[
'cams'
].
update
({
cam
:
cam_info
})
# obtain sweeps for a single key-frame
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample
[
'data'
][
'LIDAR_TOP'
])
sweeps
=
[]
while
len
(
sweeps
)
<
max_sweeps
:
if
not
sd_rec
[
'prev'
]
==
''
:
sweep
=
obtain_sensor2top
(
nusc
,
sd_rec
[
'prev'
],
l2e_t
,
l2e_r_mat
,
e2g_t
,
e2g_r_mat
,
'lidar'
)
sweeps
.
append
(
sweep
)
sd_rec
=
nusc
.
get
(
'sample_data'
,
sd_rec
[
'prev'
])
else
:
break
info
[
'sweeps'
]
=
sweeps
# obtain annotation
if
not
test
:
annotations
=
[
nusc
.
get
(
'sample_annotation'
,
token
)
for
token
in
sample
[
'anns'
]
]
locs
=
np
.
array
([
b
.
center
for
b
in
boxes
]).
reshape
(
-
1
,
3
)
dims
=
np
.
array
([
b
.
wlh
for
b
in
boxes
]).
reshape
(
-
1
,
3
)
rots
=
np
.
array
([
b
.
orientation
.
yaw_pitch_roll
[
0
]
for
b
in
boxes
]).
reshape
(
-
1
,
1
)
velocity
=
np
.
array
(
[
nusc
.
box_velocity
(
token
)[:
2
]
for
token
in
sample
[
'anns'
]])
valid_flag
=
np
.
array
(
[(
anno
[
'num_lidar_pts'
]
+
anno
[
'num_radar_pts'
])
>
0
for
anno
in
annotations
],
dtype
=
bool
).
reshape
(
-
1
)
# convert velo from global to lidar
for
i
in
range
(
len
(
boxes
)):
velo
=
np
.
array
([
*
velocity
[
i
],
0.0
])
velo
=
velo
@
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
l2e_r_mat
).
T
velocity
[
i
]
=
velo
[:
2
]
names
=
[
b
.
name
for
b
in
boxes
]
for
i
in
range
(
len
(
names
)):
if
names
[
i
]
in
NuScenesDataset
.
NameMapping
:
names
[
i
]
=
NuScenesDataset
.
NameMapping
[
names
[
i
]]
names
=
np
.
array
(
names
)
# we need to convert box size to
# the format of our lidar coordinate system
# which is x_size, y_size, z_size (corresponding to l, w, h)
gt_boxes
=
np
.
concatenate
([
locs
,
dims
[:,
[
1
,
0
,
2
]],
rots
],
axis
=
1
)
assert
len
(
gt_boxes
)
==
len
(
annotations
),
f
'
{
len
(
gt_boxes
)
}
,
{
len
(
annotations
)
}
'
info
[
'gt_boxes'
]
=
gt_boxes
info
[
'gt_names'
]
=
names
info
[
'gt_velocity'
]
=
velocity
.
reshape
(
-
1
,
2
)
info
[
'num_lidar_pts'
]
=
np
.
array
(
[
a
[
'num_lidar_pts'
]
for
a
in
annotations
])
info
[
'num_radar_pts'
]
=
np
.
array
(
[
a
[
'num_radar_pts'
]
for
a
in
annotations
])
info
[
'valid_flag'
]
=
valid_flag
if
sample
[
'scene_token'
]
in
train_scenes
:
train_nusc_infos
.
append
(
info
)
else
:
val_nusc_infos
.
append
(
info
)
return
train_nusc_infos
,
val_nusc_infos
def
obtain_sensor2top
(
nusc
,
sensor_token
,
l2e_t
,
l2e_r_mat
,
e2g_t
,
e2g_r_mat
,
sensor_type
=
'lidar'
):
"""Obtain the info with RT matric from general sensor to Top LiDAR.
Args:
nusc (class): Dataset class in the nuScenes dataset.
sensor_token (str): Sample data token corresponding to the
specific sensor type.
l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
in shape (3, 3).
e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
e2g_r_mat (np.ndarray): Rotation matrix from ego to global
in shape (3, 3).
sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'.
Returns:
sweep (dict): Sweep information after transformation.
"""
sd_rec
=
nusc
.
get
(
'sample_data'
,
sensor_token
)
cs_record
=
nusc
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_record
=
nusc
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
data_path
=
str
(
nusc
.
get_sample_data_path
(
sd_rec
[
'token'
]))
if
os
.
getcwd
()
in
data_path
:
# path from lyftdataset is absolute path
data_path
=
data_path
.
split
(
f
'
{
os
.
getcwd
()
}
/'
)[
-
1
]
# relative path
sweep
=
{
'data_path'
:
data_path
,
'type'
:
sensor_type
,
'sample_data_token'
:
sd_rec
[
'token'
],
'sensor2ego_translation'
:
cs_record
[
'translation'
],
'sensor2ego_rotation'
:
cs_record
[
'rotation'
],
'ego2global_translation'
:
pose_record
[
'translation'
],
'ego2global_rotation'
:
pose_record
[
'rotation'
],
'timestamp'
:
sd_rec
[
'timestamp'
]
}
l2e_r_s
=
sweep
[
'sensor2ego_rotation'
]
l2e_t_s
=
sweep
[
'sensor2ego_translation'
]
e2g_r_s
=
sweep
[
'ego2global_rotation'
]
e2g_t_s
=
sweep
[
'ego2global_translation'
]
# obtain the RT from sensor to Top LiDAR
# sweep->ego->global->ego'->lidar
l2e_r_s_mat
=
Quaternion
(
l2e_r_s
).
rotation_matrix
e2g_r_s_mat
=
Quaternion
(
e2g_r_s
).
rotation_matrix
R
=
(
l2e_r_s_mat
.
T
@
e2g_r_s_mat
.
T
)
@
(
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
l2e_r_mat
).
T
)
T
=
(
l2e_t_s
@
e2g_r_s_mat
.
T
+
e2g_t_s
)
@
(
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
l2e_r_mat
).
T
)
T
-=
e2g_t
@
(
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
l2e_r_mat
).
T
)
+
l2e_t
@
np
.
linalg
.
inv
(
l2e_r_mat
).
T
sweep
[
'sensor2lidar_rotation'
]
=
R
.
T
# points @ R.T + T
sweep
[
'sensor2lidar_translation'
]
=
T
return
sweep
def
export_2d_annotation
(
root_path
,
info_path
,
version
,
mono3d
=
True
):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
version (str): Dataset version.
mono3d (bool, optional): Whether to export mono3d annotation.
Default: True.
"""
# get bbox annotations for camera
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
nusc_infos
=
mmcv
.
load
(
info_path
)[
'infos'
]
nusc
=
NuScenes
(
version
=
version
,
dataroot
=
root_path
,
verbose
=
True
)
# info_2d_list = []
cat2Ids
=
[
dict
(
id
=
nus_categories
.
index
(
cat_name
),
name
=
cat_name
)
for
cat_name
in
nus_categories
]
coco_ann_id
=
0
coco_2d_dict
=
dict
(
annotations
=
[],
images
=
[],
categories
=
cat2Ids
)
for
info
in
mmcv
.
track_iter_progress
(
nusc_infos
):
for
cam
in
camera_types
:
cam_info
=
info
[
'cams'
][
cam
]
coco_infos
=
get_2d_boxes
(
nusc
,
cam_info
[
'sample_data_token'
],
visibilities
=
[
''
,
'1'
,
'2'
,
'3'
,
'4'
],
mono3d
=
mono3d
)
(
height
,
width
,
_
)
=
mmcv
.
imread
(
cam_info
[
'data_path'
]).
shape
coco_2d_dict
[
'images'
].
append
(
dict
(
file_name
=
cam_info
[
'data_path'
].
split
(
'data/nuscenes/'
)
[
-
1
],
id
=
cam_info
[
'sample_data_token'
],
token
=
info
[
'token'
],
cam2ego_rotation
=
cam_info
[
'sensor2ego_rotation'
],
cam2ego_translation
=
cam_info
[
'sensor2ego_translation'
],
ego2global_rotation
=
info
[
'ego2global_rotation'
],
ego2global_translation
=
info
[
'ego2global_translation'
],
cam_intrinsic
=
cam_info
[
'cam_intrinsic'
],
width
=
width
,
height
=
height
))
for
coco_info
in
coco_infos
:
if
coco_info
is
None
:
continue
# add an empty key for coco format
coco_info
[
'segmentation'
]
=
[]
coco_info
[
'id'
]
=
coco_ann_id
coco_2d_dict
[
'annotations'
].
append
(
coco_info
)
coco_ann_id
+=
1
if
mono3d
:
json_prefix
=
f
'
{
info_path
[:
-
4
]
}
_mono3d'
else
:
json_prefix
=
f
'
{
info_path
[:
-
4
]
}
'
mmcv
.
dump
(
coco_2d_dict
,
f
'
{
json_prefix
}
.coco.json'
)
def
get_2d_boxes
(
nusc
,
sample_data_token
:
str
,
visibilities
:
List
[
str
],
mono3d
=
True
):
"""Get the 2D annotation records for a given `sample_data_token`.
Args:
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
assert
sd_rec
[
'sensor_modality'
]
==
'camera'
,
'Error: get_2d_boxes only works'
\
' for camera sample_data!'
if
not
sd_rec
[
'is_key_frame'
]:
raise
ValueError
(
'The 2D re-projections are available only for keyframes.'
)
s_rec
=
nusc
.
get
(
'sample'
,
sd_rec
[
'sample_token'
])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec
=
nusc
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_rec
=
nusc
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
camera_intrinsic
=
np
.
array
(
cs_rec
[
'camera_intrinsic'
])
# Get all the annotation with the specified visibilties.
ann_recs
=
[
nusc
.
get
(
'sample_annotation'
,
token
)
for
token
in
s_rec
[
'anns'
]
]
ann_recs
=
[
ann_rec
for
ann_rec
in
ann_recs
if
(
ann_rec
[
'visibility_token'
]
in
visibilities
)
]
repro_recs
=
[]
for
ann_rec
in
ann_recs
:
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
ann_rec
[
'token'
]
ann_rec
[
'sample_data_token'
]
=
sample_data_token
# Get the box in global coordinates.
box
=
nusc
.
get_box
(
ann_rec
[
'token'
])
# Move them to the ego-pose frame.
box
.
translate
(
-
np
.
array
(
pose_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
pose_rec
[
'rotation'
]).
inverse
)
# Move them to the calibrated sensor frame.
box
.
translate
(
-
np
.
array
(
cs_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
cs_rec
[
'rotation'
]).
inverse
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box
.
corners
()
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
sample_data_token
,
sd_rec
[
'filename'
])
# If mono3d=True, add 3D annotations in camera coordinates
if
mono3d
and
(
repro_rec
is
not
None
):
loc
=
box
.
center
.
tolist
()
dim
=
box
.
wlh
dim
[[
0
,
1
,
2
]]
=
dim
[[
1
,
2
,
0
]]
# convert wlh to our lhw
dim
=
dim
.
tolist
()
rot
=
box
.
orientation
.
yaw_pitch_roll
[
0
]
rot
=
[
-
rot
]
# convert the rot to our cam coordinate
global_velo2d
=
nusc
.
box_velocity
(
box
.
token
)[:
2
]
global_velo3d
=
np
.
array
([
*
global_velo2d
,
0.0
])
e2g_r_mat
=
Quaternion
(
pose_rec
[
'rotation'
]).
rotation_matrix
c2e_r_mat
=
Quaternion
(
cs_rec
[
'rotation'
]).
rotation_matrix
cam_velo3d
=
global_velo3d
@
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
c2e_r_mat
).
T
velo
=
cam_velo3d
[
0
::
2
].
tolist
()
repro_rec
[
'bbox_cam3d'
]
=
loc
+
dim
+
rot
repro_rec
[
'velo_cam3d'
]
=
velo
center3d
=
np
.
array
(
loc
).
reshape
([
1
,
3
])
center2d
=
points_cam2img
(
center3d
,
camera_intrinsic
,
with_depth
=
True
)
repro_rec
[
'center2d'
]
=
center2d
.
squeeze
().
tolist
()
# normalized center2D + depth
# if samples with depth < 0 will be removed
if
repro_rec
[
'center2d'
][
2
]
<=
0
:
continue
ann_token
=
nusc
.
get
(
'sample_annotation'
,
box
.
token
)[
'attribute_tokens'
]
if
len
(
ann_token
)
==
0
:
attr_name
=
'None'
else
:
attr_name
=
nusc
.
get
(
'attribute'
,
ann_token
[
0
])[
'name'
]
attr_id
=
nus_attributes
.
index
(
attr_name
)
repro_rec
[
'attribute_name'
]
=
attr_name
repro_rec
[
'attribute_id'
]
=
attr_id
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
post_process_coords
(
corner_coords
:
List
,
imsize
:
Tuple
[
int
,
int
]
=
(
1600
,
900
)
)
->
Union
[
Tuple
[
float
,
float
,
float
,
float
],
None
]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (list[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
Return:
tuple [float]: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box
=
MultiPoint
(
corner_coords
).
convex_hull
img_canvas
=
box
(
0
,
0
,
imsize
[
0
],
imsize
[
1
])
if
polygon_from_2d_box
.
intersects
(
img_canvas
):
img_intersection
=
polygon_from_2d_box
.
intersection
(
img_canvas
)
intersection_coords
=
np
.
array
(
[
coord
for
coord
in
img_intersection
.
exterior
.
coords
])
min_x
=
min
(
intersection_coords
[:,
0
])
min_y
=
min
(
intersection_coords
[:,
1
])
max_x
=
max
(
intersection_coords
[:,
0
])
max_y
=
max
(
intersection_coords
[:,
1
])
return
min_x
,
min_y
,
max_x
,
max_y
else
:
return
None
def
generate_record
(
ann_rec
:
dict
,
x1
:
float
,
y1
:
float
,
x2
:
float
,
y2
:
float
,
sample_data_token
:
str
,
filename
:
str
)
->
OrderedDict
:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, dx, dy of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec
=
OrderedDict
()
repro_rec
[
'sample_data_token'
]
=
sample_data_token
coco_rec
=
dict
()
relevant_keys
=
[
'attribute_tokens'
,
'category_name'
,
'instance_token'
,
'next'
,
'num_lidar_pts'
,
'num_radar_pts'
,
'prev'
,
'sample_annotation_token'
,
'sample_data_token'
,
'visibility_token'
,
]
for
key
,
value
in
ann_rec
.
items
():
if
key
in
relevant_keys
:
repro_rec
[
key
]
=
value
repro_rec
[
'bbox_corners'
]
=
[
x1
,
y1
,
x2
,
y2
]
repro_rec
[
'filename'
]
=
filename
coco_rec
[
'file_name'
]
=
filename
coco_rec
[
'image_id'
]
=
sample_data_token
coco_rec
[
'area'
]
=
(
y2
-
y1
)
*
(
x2
-
x1
)
if
repro_rec
[
'category_name'
]
not
in
NuScenesDataset
.
NameMapping
:
return
None
cat_name
=
NuScenesDataset
.
NameMapping
[
repro_rec
[
'category_name'
]]
coco_rec
[
'category_name'
]
=
cat_name
coco_rec
[
'category_id'
]
=
nus_categories
.
index
(
cat_name
)
coco_rec
[
'bbox'
]
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
coco_rec
[
'iscrowd'
]
=
0
return
coco_rec
Prev
1
…
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment