Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
d2b71343
Commit
d2b71343
authored
Apr 08, 2026
by
雍大凯
Browse files
add code
parent
69e57885
Changes
259
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
5208 additions
and
2 deletions
+5208
-2
docker-hub/FlashOCC/Flashocc/tools/slurm_test.sh
docker-hub/FlashOCC/Flashocc/tools/slurm_test.sh
+24
-0
docker-hub/FlashOCC/Flashocc/tools/slurm_train.sh
docker-hub/FlashOCC/Flashocc/tools/slurm_train.sh
+24
-0
docker-hub/FlashOCC/Flashocc/tools/test.py
docker-hub/FlashOCC/Flashocc/tools/test.py
+290
-0
docker-hub/FlashOCC/Flashocc/tools/train.py
docker-hub/FlashOCC/Flashocc/tools/train.py
+290
-0
docker-hub/FlashOCC/Flashocc/tools/update_data_coords.py
docker-hub/FlashOCC/Flashocc/tools/update_data_coords.py
+168
-0
docker-hub/FlashOCC/Flashocc/tools/update_data_coords.sh
docker-hub/FlashOCC/Flashocc/tools/update_data_coords.sh
+22
-0
docker-hub/FlashOCC/Flashocc/tools/vis_occ.py
docker-hub/FlashOCC/Flashocc/tools/vis_occ.py
+316
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log
...shOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log
+1715
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log.json
.../Flashocc/work_dirs/flashocc-r50/20260403_162421.log.json
+1
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log
...shOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log
+1725
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log.json
.../Flashocc/work_dirs/flashocc-r50/20260403_162651.log.json
+11
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/flashocc-r50.py
.../FlashOCC/Flashocc/work_dirs/flashocc-r50/flashocc-r50.py
+617
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204673.bw61.849.0
...occ-r50/tf_logs/events.out.tfevents.1775204673.bw61.849.0
+0
-0
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204818.bw61.20636.0
...c-r50/tf_logs/events.out.tfevents.1775204818.bw61.20636.0
+0
-0
docker-hub/MapTRv2/MapTR
docker-hub/MapTRv2/MapTR
+1
-0
docker-hub/PointPillars/pointpillars/mmdetection3d
docker-hub/PointPillars/pointpillars/mmdetection3d
+1
-0
docker-hub/Sparse4D/Sparse4D
docker-hub/Sparse4D/Sparse4D
+1
-0
docker-hub/qwen2.5-vl/readme.md
docker-hub/qwen2.5-vl/readme.md
+1
-1
docker-hub/wan2.1/readme.md
docker-hub/wan2.1/readme.md
+1
-1
No files found.
docker-hub/FlashOCC/Flashocc/tools/slurm_test.sh
0 → 100755
View file @
d2b71343
#!/usr/bin/env bash
set
-x
PARTITION
=
$1
JOB_NAME
=
$2
CONFIG
=
$3
CHECKPOINT
=
$4
GPUS
=
${
GPUS
:-
8
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
8
}
CPUS_PER_TASK
=
${
CPUS_PER_TASK
:-
5
}
PY_ARGS
=
${
@
:5
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
srun
-p
${
PARTITION
}
\
--job-name
=
${
JOB_NAME
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--ntasks
=
${
GPUS
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--cpus-per-task
=
${
CPUS_PER_TASK
}
\
--kill-on-bad-exit
=
1
\
${
SRUN_ARGS
}
\
python
-u
tools/test.py
${
CONFIG
}
${
CHECKPOINT
}
--launcher
=
"slurm"
${
PY_ARGS
}
docker-hub/FlashOCC/Flashocc/tools/slurm_train.sh
0 → 100755
View file @
d2b71343
#!/usr/bin/env bash
set
-x
PARTITION
=
$1
JOB_NAME
=
$2
CONFIG
=
$3
WORK_DIR
=
$4
GPUS
=
${
GPUS
:-
8
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
8
}
CPUS_PER_TASK
=
${
CPUS_PER_TASK
:-
5
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
PY_ARGS
=
${
@
:5
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
srun
-p
${
PARTITION
}
\
--job-name
=
${
JOB_NAME
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--ntasks
=
${
GPUS
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--cpus-per-task
=
${
CPUS_PER_TASK
}
\
--kill-on-bad-exit
=
1
\
${
SRUN_ARGS
}
\
python
-u
tools/train.py
${
CONFIG
}
--work-dir
=
${
WORK_DIR
}
--launcher
=
"slurm"
${
PY_ARGS
}
docker-hub/FlashOCC/Flashocc/tools/test.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
os
import
warnings
import
mmcv
import
torch
from
mmcv
import
Config
,
DictAction
from
mmcv.cnn
import
fuse_conv_bn
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
(
get_dist_info
,
init_dist
,
load_checkpoint
,
wrap_fp16_model
)
import
mmdet
from
mmdet3d.apis
import
single_gpu_test
from
mmdet3d.datasets
import
build_dataloader
,
build_dataset
from
mmdet3d.models
import
build_model
from
mmdet.apis
import
multi_gpu_test
,
set_random_seed
from
mmdet.datasets
import
replace_ImageToTensor
if
mmdet
.
__version__
>
'2.23.0'
:
# If mmdet version > 2.23.0, setup_multi_processes would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
setup_multi_processes
else
:
from
mmdet3d.utils
import
setup_multi_processes
try
:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
compat_cfg
except
ImportError
:
from
mmdet3d.utils
import
compat_cfg
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'MMDet test (and eval) a model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--out'
,
help
=
'output result file in pickle format'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
parser
.
add_argument
(
'--gpu-ids'
,
type
=
int
,
nargs
=
'+'
,
help
=
'(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)'
)
parser
.
add_argument
(
'--gpu-id'
,
type
=
int
,
default
=
0
,
help
=
'id of gpu to use '
'(only applicable to non-distributed testing)'
)
parser
.
add_argument
(
'--format-only'
,
action
=
'store_true'
,
help
=
'Format the output results without perform evaluation. It is'
'useful when you want to format the result to a specific format and '
'submit it to the test server'
)
parser
.
add_argument
(
'--eval'
,
type
=
str
,
nargs
=
'+'
,
help
=
'evaluation metrics, which depends on the dataset, e.g., "bbox",'
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC'
)
parser
.
add_argument
(
'--show'
,
action
=
'store_true'
,
help
=
'show results'
)
parser
.
add_argument
(
'--show-dir'
,
help
=
'directory where results will be saved'
)
parser
.
add_argument
(
'--gpu-collect'
,
action
=
'store_true'
,
help
=
'whether to use gpu to collect results.'
)
parser
.
add_argument
(
'--no-aavt'
,
action
=
'store_true'
,
help
=
'Do not align after view transformer.'
)
parser
.
add_argument
(
'--tmpdir'
,
help
=
'tmp directory used for collecting results from multiple '
'workers, available when gpu-collect is not specified'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
0
,
help
=
'random seed'
)
parser
.
add_argument
(
'--deterministic'
,
action
=
'store_true'
,
help
=
'whether to set deterministic options for CUDNN backend.'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.'
)
parser
.
add_argument
(
'--options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function (deprecate), '
'change to --eval-options instead.'
)
parser
.
add_argument
(
'--eval-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function'
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
default
=
0
)
args
=
parser
.
parse_args
()
if
'LOCAL_RANK'
not
in
os
.
environ
:
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
args
.
local_rank
)
if
args
.
options
and
args
.
eval_options
:
raise
ValueError
(
'--options and --eval-options cannot be both specified, '
'--options is deprecated in favor of --eval-options'
)
if
args
.
options
:
warnings
.
warn
(
'--options is deprecated in favor of --eval-options'
)
args
.
eval_options
=
args
.
options
return
args
def
main
():
args
=
parse_args
()
assert
args
.
out
or
args
.
eval
or
args
.
format_only
or
args
.
show
\
or
args
.
show_dir
,
\
(
'Please specify at least one operation (save/eval/format/show the '
'results / save the results) with the argument "--out", "--eval"'
', "--format-only", "--show" or "--show-dir"'
)
if
args
.
eval
and
args
.
format_only
:
raise
ValueError
(
'--eval and --format_only cannot be both specified'
)
if
args
.
out
is
not
None
and
not
args
.
out
.
endswith
((
'.pkl'
,
'.pickle'
)):
raise
ValueError
(
'The output file must be a pkl file.'
)
cfg
=
Config
.
fromfile
(
args
.
config
)
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
cfg
=
compat_cfg
(
cfg
)
# set multi-process settings
setup_multi_processes
(
cfg
)
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
cfg
.
model
.
pretrained
=
None
if
args
.
gpu_ids
is
not
None
:
cfg
.
gpu_ids
=
args
.
gpu_ids
[
0
:
1
]
warnings
.
warn
(
'`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed testing. Use the first GPU '
'in `gpu_ids` now.'
)
else
:
cfg
.
gpu_ids
=
[
args
.
gpu_id
]
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfg
.
dist_params
)
test_dataloader_default_args
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
dist
=
distributed
,
shuffle
=
False
)
# in case the test dataset is concatenated
if
isinstance
(
cfg
.
data
.
test
,
dict
):
cfg
.
data
.
test
.
test_mode
=
True
if
cfg
.
data
.
test_dataloader
.
get
(
'samples_per_gpu'
,
1
)
>
1
:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg
.
data
.
test
.
pipeline
=
replace_ImageToTensor
(
cfg
.
data
.
test
.
pipeline
)
elif
isinstance
(
cfg
.
data
.
test
,
list
):
for
ds_cfg
in
cfg
.
data
.
test
:
ds_cfg
.
test_mode
=
True
if
cfg
.
data
.
test_dataloader
.
get
(
'samples_per_gpu'
,
1
)
>
1
:
for
ds_cfg
in
cfg
.
data
.
test
:
ds_cfg
.
pipeline
=
replace_ImageToTensor
(
ds_cfg
.
pipeline
)
test_loader_cfg
=
{
**
test_dataloader_default_args
,
**
cfg
.
data
.
get
(
'test_dataloader'
,
{})
}
# set random seeds
if
args
.
seed
is
not
None
:
set_random_seed
(
args
.
seed
,
deterministic
=
args
.
deterministic
)
# build the dataloader
dataset
=
build_dataset
(
cfg
.
data
.
test
)
data_loader
=
build_dataloader
(
dataset
,
**
test_loader_cfg
)
# build the model and load checkpoint
if
not
args
.
no_aavt
:
if
'4D'
in
cfg
.
model
.
type
:
cfg
.
model
.
align_after_view_transfromation
=
True
cfg
.
model
.
train_cfg
=
None
model
=
build_model
(
cfg
.
model
,
test_cfg
=
cfg
.
get
(
'test_cfg'
))
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
wrap_fp16_model
(
model
)
checkpoint
=
load_checkpoint
(
model
,
args
.
checkpoint
,
map_location
=
'cpu'
)
if
args
.
fuse_conv_bn
:
model
=
fuse_conv_bn
(
model
)
# old versions did not save class info in checkpoints, this walkaround is
# for backward compatibility
if
'CLASSES'
in
checkpoint
.
get
(
'meta'
,
{}):
model
.
CLASSES
=
checkpoint
[
'meta'
][
'CLASSES'
]
else
:
model
.
CLASSES
=
dataset
.
CLASSES
# palette for visualization in segmentation tasks
if
'PALETTE'
in
checkpoint
.
get
(
'meta'
,
{}):
model
.
PALETTE
=
checkpoint
[
'meta'
][
'PALETTE'
]
elif
hasattr
(
dataset
,
'PALETTE'
):
# segmentation dataset has `PALETTE` attribute
model
.
PALETTE
=
dataset
.
PALETTE
if
not
distributed
:
model
=
MMDataParallel
(
model
,
device_ids
=
cfg
.
gpu_ids
)
outputs
=
single_gpu_test
(
model
,
data_loader
,
args
.
show
,
args
.
show_dir
)
else
:
model
=
MMDistributedDataParallel
(
model
.
cuda
(),
device_ids
=
[
torch
.
cuda
.
current_device
()],
broadcast_buffers
=
False
)
outputs
=
multi_gpu_test
(
model
,
data_loader
,
args
.
tmpdir
,
args
.
gpu_collect
)
rank
,
_
=
get_dist_info
()
if
rank
==
0
:
if
args
.
out
:
print
(
f
'
\n
writing results to
{
args
.
out
}
'
)
mmcv
.
dump
(
outputs
,
args
.
out
)
kwargs
=
{}
if
args
.
eval_options
is
None
else
args
.
eval_options
if
args
.
format_only
:
dataset
.
format_results
(
outputs
,
**
kwargs
)
if
args
.
eval
:
eval_kwargs
=
cfg
.
get
(
'evaluation'
,
{}).
copy
()
# hard-code way to remove EvalHook args
for
key
in
[
'interval'
,
'tmpdir'
,
'start'
,
'gpu_collect'
,
'save_best'
,
'rule'
]:
eval_kwargs
.
pop
(
key
,
None
)
eval_kwargs
.
update
(
dict
(
metric
=
args
.
eval
,
**
kwargs
))
print
(
dataset
.
evaluate
(
outputs
,
**
eval_kwargs
))
if
__name__
==
'__main__'
:
main
()
docker-hub/FlashOCC/Flashocc/tools/train.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
from
__future__
import
division
import
argparse
import
copy
import
os
import
time
import
warnings
from
os
import
path
as
osp
import
mmcv
import
torch
import
torch.distributed
as
dist
from
mmcv
import
Config
,
DictAction
from
mmcv.runner
import
get_dist_info
,
init_dist
from
mmdet
import
__version__
as
mmdet_version
from
mmdet3d
import
__version__
as
mmdet3d_version
from
mmdet3d.apis
import
init_random_seed
,
train_model
from
mmdet3d.datasets
import
build_dataset
from
mmdet3d.models
import
build_model
from
mmdet3d.utils
import
collect_env
,
get_root_logger
from
mmdet.apis
import
set_random_seed
from
mmseg
import
__version__
as
mmseg_version
torch
.
_dynamo
.
config
.
capture_scalar_outputs
=
True
try
:
# If mmdet version > 2.20.0, setup_multi_processes would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
setup_multi_processes
except
ImportError
:
from
mmdet3d.utils
import
setup_multi_processes
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Train a detector'
)
parser
.
add_argument
(
'config'
,
help
=
'train config file path'
)
parser
.
add_argument
(
'--work-dir'
,
help
=
'the dir to save logs and models'
)
parser
.
add_argument
(
'--resume-from'
,
help
=
'the checkpoint file to resume from'
)
parser
.
add_argument
(
'--auto-resume'
,
action
=
'store_true'
,
help
=
'resume from the latest checkpoint automatically'
)
parser
.
add_argument
(
'--validate'
,
action
=
'store_true'
,
help
=
'whether not to evaluate the checkpoint during training'
)
group_gpus
=
parser
.
add_mutually_exclusive_group
()
group_gpus
.
add_argument
(
'--gpus'
,
type
=
int
,
help
=
'(Deprecated, please use --gpu-id) number of gpus to use '
'(only applicable to non-distributed training)'
)
group_gpus
.
add_argument
(
'--gpu-ids'
,
type
=
int
,
nargs
=
'+'
,
help
=
'(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)'
)
group_gpus
.
add_argument
(
'--gpu-id'
,
type
=
int
,
default
=
0
,
help
=
'number of gpus to use '
'(only applicable to non-distributed training)'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
0
,
help
=
'random seed'
)
parser
.
add_argument
(
'--diff-seed'
,
action
=
'store_true'
,
help
=
'Whether or not set different seeds for different ranks'
)
parser
.
add_argument
(
'--deterministic'
,
action
=
'store_true'
,
help
=
'whether to set deterministic options for CUDNN backend.'
)
parser
.
add_argument
(
'--options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.'
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--autoscale-lr'
,
action
=
'store_true'
,
help
=
'automatically scale lr with the number of gpus'
)
args
=
parser
.
parse_args
()
if
'LOCAL_RANK'
not
in
os
.
environ
:
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
args
.
local_rank
)
if
args
.
options
and
args
.
cfg_options
:
raise
ValueError
(
'--options and --cfg-options cannot be both specified, '
'--options is deprecated in favor of --cfg-options'
)
if
args
.
options
:
warnings
.
warn
(
'--options is deprecated in favor of --cfg-options'
)
args
.
cfg_options
=
args
.
options
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
# set multi-process settings
setup_multi_processes
(
cfg
)
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfg
,
'plugin'
):
if
cfg
.
plugin
:
import
importlib
if
hasattr
(
cfg
,
'plugin_dir'
):
plugin_dir
=
cfg
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
# work_dir is determined in this priority: CLI > segment in file > filename
if
args
.
work_dir
is
not
None
:
# update configs according to CLI args if args.work_dir is not None
cfg
.
work_dir
=
args
.
work_dir
elif
cfg
.
get
(
'work_dir'
,
None
)
is
None
:
# use config filename as default work_dir if cfg.work_dir is None
cfg
.
work_dir
=
osp
.
join
(
'./work_dirs'
,
osp
.
splitext
(
osp
.
basename
(
args
.
config
))[
0
])
if
args
.
resume_from
is
not
None
:
cfg
.
resume_from
=
args
.
resume_from
if
args
.
auto_resume
:
cfg
.
auto_resume
=
args
.
auto_resume
warnings
.
warn
(
'`--auto-resume` is only supported when mmdet'
'version >= 2.20.0 for 3D detection model or'
'mmsegmentation verision >= 0.21.0 for 3D'
'segmentation model'
)
if
args
.
gpus
is
not
None
:
cfg
.
gpu_ids
=
range
(
1
)
warnings
.
warn
(
'`--gpus` is deprecated because we only support '
'single GPU mode in non-distributed training. '
'Use `gpus=1` now.'
)
if
args
.
gpu_ids
is
not
None
:
cfg
.
gpu_ids
=
args
.
gpu_ids
[
0
:
1
]
warnings
.
warn
(
'`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed training. Use the first GPU '
'in `gpu_ids` now.'
)
if
args
.
gpus
is
None
and
args
.
gpu_ids
is
None
:
cfg
.
gpu_ids
=
[
args
.
gpu_id
]
if
args
.
autoscale_lr
:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg
.
optimizer
[
'lr'
]
=
cfg
.
optimizer
[
'lr'
]
*
len
(
cfg
.
gpu_ids
)
/
8
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfg
.
dist_params
)
# re-set gpu_ids with distributed training mode
_
,
world_size
=
get_dist_info
()
cfg
.
gpu_ids
=
range
(
world_size
)
# create work_dir
mmcv
.
mkdir_or_exist
(
osp
.
abspath
(
cfg
.
work_dir
))
# dump config
cfg
.
dump
(
osp
.
join
(
cfg
.
work_dir
,
osp
.
basename
(
args
.
config
)))
# init the logger before other steps
timestamp
=
time
.
strftime
(
'%Y%m%d_%H%M%S'
,
time
.
localtime
())
log_file
=
osp
.
join
(
cfg
.
work_dir
,
f
'
{
timestamp
}
.log'
)
# specify logger name, if we still use 'mmdet', the output info will be
# filtered and won't be saved in the log_file
# TODO: ugly workaround to judge whether we are training det or seg model
if
cfg
.
model
.
type
in
[
'EncoderDecoder3D'
]:
logger_name
=
'mmseg'
else
:
logger_name
=
'mmdet'
logger
=
get_root_logger
(
log_file
=
log_file
,
log_level
=
cfg
.
log_level
,
name
=
logger_name
)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta
=
dict
()
# log env info
env_info_dict
=
collect_env
()
env_info
=
'
\n
'
.
join
([(
f
'
{
k
}
:
{
v
}
'
)
for
k
,
v
in
env_info_dict
.
items
()])
dash_line
=
'-'
*
60
+
'
\n
'
logger
.
info
(
'Environment info:
\n
'
+
dash_line
+
env_info
+
'
\n
'
+
dash_line
)
meta
[
'env_info'
]
=
env_info
meta
[
'config'
]
=
cfg
.
pretty_text
# log some basic info
logger
.
info
(
f
'Distributed training:
{
distributed
}
'
)
logger
.
info
(
f
'Config:
\n
{
cfg
.
pretty_text
}
'
)
# set random seeds
seed
=
init_random_seed
(
args
.
seed
)
seed
=
seed
+
dist
.
get_rank
()
if
args
.
diff_seed
else
seed
logger
.
info
(
f
'Set random seed to
{
seed
}
, '
f
'deterministic:
{
args
.
deterministic
}
'
)
set_random_seed
(
seed
,
deterministic
=
args
.
deterministic
)
cfg
.
seed
=
seed
meta
[
'seed'
]
=
seed
meta
[
'exp_name'
]
=
osp
.
basename
(
args
.
config
)
model
=
build_model
(
cfg
.
model
,
train_cfg
=
cfg
.
get
(
'train_cfg'
),
test_cfg
=
cfg
.
get
(
'test_cfg'
))
model
.
init_weights
()
logger
.
info
(
f
'Model:
\n
{
model
}
'
)
datasets
=
[
build_dataset
(
cfg
.
data
.
train
)]
if
len
(
cfg
.
workflow
)
==
2
:
val_dataset
=
copy
.
deepcopy
(
cfg
.
data
.
val
)
# in case we use a dataset wrapper
if
'dataset'
in
cfg
.
data
.
train
:
val_dataset
.
pipeline
=
cfg
.
data
.
train
.
dataset
.
pipeline
else
:
val_dataset
.
pipeline
=
cfg
.
data
.
train
.
pipeline
# set test_mode=False here in deep copied config
# which do not affect AP/AR calculation later
# refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa
val_dataset
.
test_mode
=
False
datasets
.
append
(
build_dataset
(
val_dataset
))
if
cfg
.
checkpoint_config
is
not
None
:
# save mmdet version, config file content and class names in
# checkpoints as meta data
cfg
.
checkpoint_config
.
meta
=
dict
(
mmdet_version
=
mmdet_version
,
mmseg_version
=
mmseg_version
,
mmdet3d_version
=
mmdet3d_version
,
config
=
cfg
.
pretty_text
,
CLASSES
=
datasets
[
0
].
CLASSES
,
PALETTE
=
datasets
[
0
].
PALETTE
# for segmentors
if
hasattr
(
datasets
[
0
],
'PALETTE'
)
else
None
)
# add an attribute for visualization convenience
model
.
CLASSES
=
datasets
[
0
].
CLASSES
train_model
(
model
,
datasets
,
cfg
,
distributed
=
distributed
,
validate
=
args
.
validate
,
timestamp
=
timestamp
,
meta
=
meta
)
if
__name__
==
'__main__'
:
torch
.
multiprocessing
.
set_start_method
(
'fork'
)
main
()
docker-hub/FlashOCC/Flashocc/tools/update_data_coords.py
0 → 100644
View file @
d2b71343
import
argparse
import
time
from
os
import
path
as
osp
import
mmcv
import
numpy
as
np
from
mmdet3d.core.bbox
import
limit_period
def
update_sunrgbd_infos
(
root_dir
,
out_dir
,
pkl_files
):
print
(
f
'
{
pkl_files
}
will be modified because '
f
'of the refactor of the Depth coordinate system.'
)
if
root_dir
==
out_dir
:
print
(
f
'Warning, you are overwriting '
f
'the original data under
{
root_dir
}
.'
)
time
.
sleep
(
3
)
for
pkl_file
in
pkl_files
:
in_path
=
osp
.
join
(
root_dir
,
pkl_file
)
print
(
f
'Reading from input file:
{
in_path
}
.'
)
a
=
mmcv
.
load
(
in_path
)
print
(
'Start updating:'
)
for
item
in
mmcv
.
track_iter_progress
(
a
):
if
'rotation_y'
in
item
[
'annos'
]:
item
[
'annos'
][
'rotation_y'
]
=
-
item
[
'annos'
][
'rotation_y'
]
item
[
'annos'
][
'gt_boxes_upright_depth'
][:,
-
1
:]
=
\
-
item
[
'annos'
][
'gt_boxes_upright_depth'
][:,
-
1
:]
out_path
=
osp
.
join
(
out_dir
,
pkl_file
)
print
(
f
'Writing to output file:
{
out_path
}
.'
)
mmcv
.
dump
(
a
,
out_path
,
'pkl'
)
def
update_outdoor_dbinfos
(
root_dir
,
out_dir
,
pkl_files
):
print
(
f
'
{
pkl_files
}
will be modified because '
f
'of the refactor of the LIDAR coordinate system.'
)
if
root_dir
==
out_dir
:
print
(
f
'Warning, you are overwriting '
f
'the original data under
{
root_dir
}
.'
)
time
.
sleep
(
3
)
for
pkl_file
in
pkl_files
:
in_path
=
osp
.
join
(
root_dir
,
pkl_file
)
print
(
f
'Reading from input file:
{
in_path
}
.'
)
a
=
mmcv
.
load
(
in_path
)
print
(
'Start updating:'
)
for
k
in
a
.
keys
():
print
(
f
'Updating samples of class
{
k
}
:'
)
for
item
in
mmcv
.
track_iter_progress
(
a
[
k
]):
boxes
=
item
[
'box3d_lidar'
].
copy
()
# swap l, w (or dx, dy)
item
[
'box3d_lidar'
][
3
]
=
boxes
[
4
]
item
[
'box3d_lidar'
][
4
]
=
boxes
[
3
]
# change yaw
item
[
'box3d_lidar'
][
6
]
=
-
boxes
[
6
]
-
np
.
pi
/
2
item
[
'box3d_lidar'
][
6
]
=
limit_period
(
item
[
'box3d_lidar'
][
6
],
period
=
np
.
pi
*
2
)
out_path
=
osp
.
join
(
out_dir
,
pkl_file
)
print
(
f
'Writing to output file:
{
out_path
}
.'
)
mmcv
.
dump
(
a
,
out_path
,
'pkl'
)
def
update_nuscenes_or_lyft_infos
(
root_dir
,
out_dir
,
pkl_files
):
print
(
f
'
{
pkl_files
}
will be modified because '
f
'of the refactor of the LIDAR coordinate system.'
)
if
root_dir
==
out_dir
:
print
(
f
'Warning, you are overwriting '
f
'the original data under
{
root_dir
}
.'
)
time
.
sleep
(
3
)
for
pkl_file
in
pkl_files
:
in_path
=
osp
.
join
(
root_dir
,
pkl_file
)
print
(
f
'Reading from input file:
{
in_path
}
.'
)
a
=
mmcv
.
load
(
in_path
)
print
(
'Start updating:'
)
for
item
in
mmcv
.
track_iter_progress
(
a
[
'infos'
]):
boxes
=
item
[
'gt_boxes'
].
copy
()
# swap l, w (or dx, dy)
item
[
'gt_boxes'
][:,
3
]
=
boxes
[:,
4
]
item
[
'gt_boxes'
][:,
4
]
=
boxes
[:,
3
]
# change yaw
item
[
'gt_boxes'
][:,
6
]
=
-
boxes
[:,
6
]
-
np
.
pi
/
2
item
[
'gt_boxes'
][:,
6
]
=
limit_period
(
item
[
'gt_boxes'
][:,
6
],
period
=
np
.
pi
*
2
)
out_path
=
osp
.
join
(
out_dir
,
pkl_file
)
print
(
f
'Writing to output file:
{
out_path
}
.'
)
mmcv
.
dump
(
a
,
out_path
,
'pkl'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Arg parser for data coords '
'update due to coords sys refactor.'
)
parser
.
add_argument
(
'dataset'
,
metavar
=
'kitti'
,
help
=
'name of the dataset'
)
parser
.
add_argument
(
'--root-dir'
,
type
=
str
,
default
=
'./data/kitti'
,
help
=
'specify the root dir of dataset'
)
parser
.
add_argument
(
'--version'
,
type
=
str
,
default
=
'v1.0'
,
required
=
False
,
help
=
'specify the dataset version, no need for kitti'
)
parser
.
add_argument
(
'--out-dir'
,
type
=
str
,
default
=
None
,
required
=
False
,
help
=
'name of info pkl'
)
args
=
parser
.
parse_args
()
if
__name__
==
'__main__'
:
if
args
.
out_dir
is
None
:
args
.
out_dir
=
args
.
root_dir
if
args
.
dataset
==
'kitti'
:
# KITTI infos is in CAM coord sys (unchanged)
# KITTI dbinfos is in LIDAR coord sys (changed)
# so we only update dbinfos
pkl_files
=
[
'kitti_dbinfos_train.pkl'
]
update_outdoor_dbinfos
(
root_dir
=
args
.
root_dir
,
out_dir
=
args
.
out_dir
,
pkl_files
=
pkl_files
)
elif
args
.
dataset
==
'nuscenes'
:
# nuScenes infos is in LIDAR coord sys (changed)
# nuScenes dbinfos is in LIDAR coord sys (changed)
# so we update both infos and dbinfos
pkl_files
=
[
'nuscenes_infos_val.pkl'
]
if
args
.
version
!=
'v1.0-mini'
:
pkl_files
.
append
(
'nuscenes_infos_train.pkl'
)
else
:
pkl_files
.
append
(
'nuscenes_infos_train_tiny.pkl'
)
update_nuscenes_or_lyft_infos
(
root_dir
=
args
.
root_dir
,
out_dir
=
args
.
out_dir
,
pkl_files
=
pkl_files
)
if
args
.
version
!=
'v1.0-mini'
:
pkl_files
=
[
'nuscenes_dbinfos_train.pkl'
]
update_outdoor_dbinfos
(
root_dir
=
args
.
root_dir
,
out_dir
=
args
.
out_dir
,
pkl_files
=
pkl_files
)
elif
args
.
dataset
==
'lyft'
:
# Lyft infos is in LIDAR coord sys (changed)
# Lyft has no dbinfos
# so we update infos
pkl_files
=
[
'lyft_infos_train.pkl'
,
'lyft_infos_val.pkl'
]
update_nuscenes_or_lyft_infos
(
root_dir
=
args
.
root_dir
,
out_dir
=
args
.
out_dir
,
pkl_files
=
pkl_files
)
elif
args
.
dataset
==
'waymo'
:
# Waymo infos is in CAM coord sys (unchanged)
# Waymo dbinfos is in LIDAR coord sys (changed)
# so we only update dbinfos
pkl_files
=
[
'waymo_dbinfos_train.pkl'
]
update_outdoor_dbinfos
(
root_dir
=
args
.
root_dir
,
out_dir
=
args
.
out_dir
,
pkl_files
=
pkl_files
)
elif
args
.
dataset
==
'scannet'
:
# ScanNet infos is in DEPTH coord sys (changed)
# but bbox is without yaw
# so ScanNet is unaffected
pass
elif
args
.
dataset
==
's3dis'
:
# Segmentation datasets are not affected
pass
elif
args
.
dataset
==
'sunrgbd'
:
# SUNRGBD infos is in DEPTH coord sys (changed)
# and bbox is with yaw
# so we update infos
pkl_files
=
[
'sunrgbd_infos_train.pkl'
,
'sunrgbd_infos_val.pkl'
]
update_sunrgbd_infos
(
root_dir
=
args
.
root_dir
,
out_dir
=
args
.
out_dir
,
pkl_files
=
pkl_files
)
docker-hub/FlashOCC/Flashocc/tools/update_data_coords.sh
0 → 100644
View file @
d2b71343
#!/usr/bin/env bash
set
-x
export
PYTHONPATH
=
`
pwd
`
:
$PYTHONPATH
PARTITION
=
$1
DATASET
=
$2
GPUS
=
${
GPUS
:-
1
}
GPUS_PER_NODE
=
${
GPUS_PER_NODE
:-
1
}
SRUN_ARGS
=
${
SRUN_ARGS
:-
""
}
JOB_NAME
=
update_data_coords
srun
-p
${
PARTITION
}
\
--job-name
=
${
JOB_NAME
}
\
--gres
=
gpu:
${
GPUS_PER_NODE
}
\
--ntasks
=
${
GPUS
}
\
--ntasks-per-node
=
${
GPUS_PER_NODE
}
\
--kill-on-bad-exit
=
1
\
${
SRUN_ARGS
}
\
python
-u
tools/update_data_coords.py
${
DATASET
}
\
--root-dir
./data/
${
DATASET
}
\
--out-dir
./data/
${
DATASET
}
docker-hub/FlashOCC/Flashocc/tools/vis_occ.py
0 → 100644
View file @
d2b71343
import
os
import
cv2
import
logging
import
argparse
import
importlib
import
torch
import
numpy
as
np
from
tqdm
import
tqdm
from
mmcv
import
Config
,
DictAction
from
mmdet.apis
import
set_random_seed
from
mmdet3d.datasets
import
build_dataset
,
build_dataloader
from
mmcv.parallel
import
MMDataParallel
from
mmcv.runner
import
load_checkpoint
from
mmdet3d.models
import
build_model
import
mmdet
from
mmcv.runner
import
(
get_dist_info
,
init_dist
,
load_checkpoint
,
wrap_fp16_model
)
import
sys
sys
.
path
.
insert
(
0
,
os
.
getcwd
())
if
mmdet
.
__version__
>
'2.23.0'
:
# If mmdet version > 2.23.0, setup_multi_processes would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
setup_multi_processes
else
:
from
mmdet3d.utils
import
setup_multi_processes
try
:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from
mmdet.utils
import
compat_cfg
except
ImportError
:
from
mmdet3d.utils
import
compat_cfg
occ_class_names
=
[
'others'
,
'barrier'
,
'bicycle'
,
'bus'
,
'car'
,
'construction_vehicle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'trailer'
,
'truck'
,
'driveable_surface'
,
'other_flat'
,
'sidewalk'
,
'terrain'
,
'manmade'
,
'vegetation'
,
'free'
]
color_map
=
np
.
array
([
[
0
,
0
,
0
,
255
],
# others
[
255
,
120
,
50
,
255
],
# barrier orangey
[
255
,
192
,
203
,
255
],
# bicycle pink
[
255
,
255
,
0
,
255
],
# bus yellow
[
0
,
150
,
245
,
255
],
# car blue
[
0
,
255
,
255
,
255
],
# construction_vehicle cyan
[
200
,
180
,
0
,
255
],
# motorcycle dark orange
[
255
,
0
,
0
,
255
],
# pedestrian red
[
255
,
240
,
150
,
255
],
# traffic_cone light yellow
[
135
,
60
,
0
,
255
],
# trailer brown
[
160
,
32
,
240
,
255
],
# truck purple
[
255
,
0
,
255
,
255
],
# driveable_surface dark pink
[
175
,
0
,
75
,
255
],
# other_flat dark red
[
75
,
0
,
75
,
255
],
# sidewalk dard purple
[
150
,
240
,
80
,
255
],
# terrain light green
[
230
,
230
,
250
,
255
],
# manmade white
[
0
,
175
,
0
,
255
],
# vegetation green
[
255
,
255
,
255
,
255
],
# free white
],
dtype
=
np
.
uint8
)
# # from matplotlib import colors
# # hex_code_list = [
# # '#000000', '#D3D3D3', '#BC8F8F', '#F08080', '#A52A2A', '#FF0000', '#FFA07A', '#A0522D', '#FFE4C4', '#FFE4B5', \
# # '#DAA520', '#FFD700', '#F0E68C', '#BDB76B', '#808000', '#FFFF00', '#9ACD32', '#7FFF00', '#8FBC8F', '#90EE90', \
# # '#32CD32', '#008000', '#00FF00', '#00FA9A', '#7FFFD4', '#48D1CC', '#2F4F4F', '#ADD8E6', '#87CEFA', '#DC143C', \
# # '#696969', '#9370DB', '#8A2BE2', '#9400D3', '#DDA0DD', '#FF00FF', '#C71585', '#DB7093', '#FFB6C1', '#bf9b0c', \
# # '#01889f', '#bb3f3f', '#1805db', '#48c072', '#fffd37', '#c44240', '#6140ef', '#ceaefa', '#04f489', '#c6f808', \
# # '#507b9c', '#cffdbc', '#ac7e04', '#01386a', '#ffb7ce', '#ffd1df', '#D2691E', '#FFDAB9', '#a55af4', '#95d0fc', \
# # ]
# # hex_code_list = np.array(hex_code_list).reshape(6,10).transpose(1,0).reshape(-1)
# # pano_color_map = np.array([[int(value * 255) for value in colors.hex2color(hex_code)] for hex_code in hex_code_list], dtype=np.uint8)
import
matplotlib.pyplot
as
plt
from
scipy.ndimage
import
rotate
def
draw_fig
(
tensor
,
name
=
'tensor_image_colored_no_white.png'
):
tensor
=
tensor
.
squeeze
(
0
)
tensor
=
rotate
(
tensor
,
-
90
,
reshape
=
False
)
tensor
=
np
.
flip
(
tensor
,
axis
=
1
)
fig
,
ax
=
plt
.
subplots
(
figsize
=
(
4
,
4
))
ax
.
imshow
(
tensor
,
cmap
=
'viridis'
)
plt
.
axis
(
'off'
)
plt
.
savefig
(
name
,
bbox_inches
=
'tight'
,
pad_inches
=
0
)
plt
.
clf
()
def
generate_rgb_color
(
number
):
red
=
(
number
%
256
)
green
=
((
number
//
256
)
%
256
)
blue
=
((
number
//
65536
)
%
256
)
return
[
red
,
green
,
blue
]
pano_color_map
=
np
.
array
([
generate_rgb_color
(
number
)
for
number
in
np
.
random
.
randint
(
0
,
65536
*
256
,
256
)])
inst_class_ids
=
[
2
,
3
,
4
,
5
,
6
,
7
,
9
,
10
]
def
occ2img
(
semantics
=
None
,
is_pano
=
False
,
panoptics
=
None
):
H
,
W
,
D
=
semantics
.
shape
free_id
=
len
(
occ_class_names
)
-
1
semantics_2d
=
np
.
ones
([
H
,
W
],
dtype
=
np
.
int32
)
*
free_id
for
i
in
range
(
D
):
semantics_i
=
semantics
[...,
i
]
non_free_mask
=
(
semantics_i
!=
free_id
)
semantics_2d
[
non_free_mask
]
=
semantics_i
[
non_free_mask
]
viz
=
color_map
[
semantics_2d
]
viz
=
viz
[...,
:
3
]
inst_mask
=
np
.
zeros_like
(
semantics_2d
).
astype
(
np
.
bool
)
for
ind
in
inst_class_ids
:
inst_mask
[
semantics_2d
==
ind
]
=
True
if
is_pano
:
panoptics_2d
=
np
.
ones
([
H
,
W
],
dtype
=
np
.
int32
)
*
0
for
i
in
range
(
D
):
panoptics_i
=
panoptics
[...,
i
]
semantics_i
=
semantics
[...,
i
]
non_free_mask
=
(
semantics_i
!=
free_id
)
panoptics_2d
[
non_free_mask
]
=
panoptics_i
[
non_free_mask
]
# # panoptics_2d = panoptics_2d%60
viz_pano
=
pano_color_map
[
panoptics_2d
]
viz
[
inst_mask
,:]
=
viz_pano
[
inst_mask
,:]
viz
=
cv2
.
resize
(
viz
,
dsize
=
(
800
,
800
))
return
viz
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Validate a detector'
)
parser
.
add_argument
(
'--config'
,
required
=
True
)
parser
.
add_argument
(
'--weights'
,
required
=
True
)
parser
.
add_argument
(
'--viz-dir'
,
required
=
True
)
parser
.
add_argument
(
'--override'
,
nargs
=
'+'
,
action
=
DictAction
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--draw-sem-gt'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--draw-pano-gt'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--surround-view-img'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--surround-pano-gt'
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
# parse configs
cfgs
=
Config
.
fromfile
(
args
.
config
)
if
args
.
override
is
not
None
:
cfgs
.
merge_from_dict
(
args
.
override
)
cfgs
=
compat_cfg
(
cfgs
)
# set multi-process settings
setup_multi_processes
(
cfgs
)
# import modules from plguin/xx, registry will be updated
if
hasattr
(
cfgs
,
'plugin'
):
if
cfgs
.
plugin
:
import
importlib
if
hasattr
(
cfgs
,
'plugin_dir'
):
plugin_dir
=
cfgs
.
plugin_dir
_module_dir
=
os
.
path
.
dirname
(
plugin_dir
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
print
(
_module_path
)
plg_lib
=
importlib
.
import_module
(
_module_path
)
else
:
# import dir is the dirpath for the config file
_module_dir
=
os
.
path
.
dirname
(
args
.
config
)
_module_dir
=
_module_dir
.
split
(
'/'
)
_module_path
=
_module_dir
[
0
]
for
m
in
_module_dir
[
1
:]:
_module_path
=
_module_path
+
'.'
+
m
plg_lib
=
importlib
.
import_module
(
_module_path
)
# use val-mini for visualization
#cfgs.data.val.ann_file = cfgs.data.val.ann_file.replace('val', 'val_mini')
# MMCV, please shut up
from
mmcv.utils.logging
import
logger_initialized
logger_initialized
[
'root'
]
=
logging
.
Logger
(
__name__
,
logging
.
WARNING
)
logger_initialized
[
'mmcv'
]
=
logging
.
Logger
(
__name__
,
logging
.
WARNING
)
# you need one GPU
assert
torch
.
cuda
.
is_available
()
# assert torch.cuda.device_count() == 1
# logging
logging
.
info
(
'Using GPU: %s'
%
torch
.
cuda
.
get_device_name
(
0
))
# random seed
logging
.
info
(
'Setting random seed: 0'
)
set_random_seed
(
0
,
deterministic
=
True
)
logging
.
info
(
'Loading validation set from %s'
%
cfgs
.
data
.
val
.
data_root
)
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfgs
.
dist_params
)
test_dataloader_default_args
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
2
,
dist
=
distributed
,
shuffle
=
False
)
test_loader_cfg
=
{
**
test_dataloader_default_args
,
**
cfgs
.
data
.
get
(
'test_dataloader'
,
{})
}
dataset
=
build_dataset
(
cfgs
.
data
.
test
)
test_loader_cfg
[
'workers_per_gpu'
]
=
2
val_loader
=
build_dataloader
(
dataset
,
**
test_loader_cfg
)
# val_dataset = build_dataset(cfgs.data.test)
# val_loader = build_dataloader(
# val_dataset,
# samples_per_gpu=1,
# workers_per_gpu=1,
# num_gpus=1,
# dist=False,
# shuffle=False,
# seed=0,
# )
logging
.
info
(
'Creating model: %s'
%
cfgs
.
model
.
type
)
model
=
build_model
(
cfgs
.
model
)
model
.
cuda
()
model
=
MMDataParallel
(
model
,
[
0
])
model
.
eval
()
logging
.
info
(
'Loading checkpoint from %s'
%
args
.
weights
)
load_checkpoint
(
model
,
args
.
weights
,
map_location
=
'cuda'
,
strict
=
True
,
logger
=
logging
.
Logger
(
__name__
,
logging
.
ERROR
)
)
if
not
os
.
path
.
exists
(
args
.
viz_dir
):
os
.
makedirs
(
args
.
viz_dir
)
for
i
,
data
in
tqdm
(
enumerate
(
val_loader
)):
with
torch
.
no_grad
():
occ_pred
=
model
(
return_loss
=
False
,
rescale
=
True
,
**
data
)[
0
]
if
False
:
occ_bev_feature
=
occ_pred
[
'occ_bev_feature'
]
outs
=
occ_pred
[
'outs'
]
tensor
=
occ_bev_feature
.
max
(
dim
=
1
)[
0
].
cpu
()
draw_fig
(
tensor
,
name
=
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-occ_bev_feature.jpg'
%
i
))
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-occ_bev_feature.jpg'
%
i
))
tensor
=
outs
[
0
][
0
][
'heatmap'
].
sigmoid
().
sum
(
dim
=
1
)[
0
].
cpu
()
draw_fig
(
tensor
,
name
=
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-heatmap.jpg'
%
i
))
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-heatmap.jpg'
%
i
))
tensor
=
outs
[
0
][
0
][
'reg'
][
0
,
0
].
cpu
()
tensor
=
outs
[
0
][
0
][
'reg'
][
0
,
1
].
cpu
()
tensor
=
((
outs
[
0
][
0
][
'reg'
][
0
,
0
]
**
2
+
outs
[
0
][
0
][
'reg'
][
0
,
1
]
**
2
)
**
0.5
).
unsqueeze
(
dim
=
0
).
cpu
()
draw_fig
(
tensor
,
name
=
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-reg.jpg'
%
i
))
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-reg.jpg'
%
i
))
tensor
=
outs
[
0
][
0
][
'height'
][
0
,
0
].
cpu
()
draw_fig
(
tensor
,
name
=
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-height.jpg'
%
i
))
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-height.jpg'
%
i
))
sem_pred
=
occ_pred
[
'pred_occ'
]
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-sem.jpg'
%
i
),
occ2img
(
semantics
=
sem_pred
.
cpu
())[...,
::
-
1
])
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-sem.jpg'
%
i
))
inst_pred
=
occ_pred
[
'pano_inst'
]
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-inst.jpg'
%
i
),
occ2img
(
semantics
=
sem_pred
.
cpu
(),
is_pano
=
True
,
panoptics
=
inst_pred
.
cpu
())[...,
::
-
1
])
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-inst.jpg'
%
i
))
if
args
.
surround_view_img
:
img
=
data
[
'img_inputs'
][
0
][
0
][
0
][::
9
].
cpu
().
numpy
()
mean
=
np
.
array
([
123.675
,
116.28
,
103.53
],
dtype
=
np
.
float32
).
reshape
(
1
,
3
,
1
,
1
)
std
=
np
.
array
([
58.395
,
57.12
,
57.375
],
dtype
=
np
.
float32
).
reshape
(
1
,
3
,
1
,
1
)
img
=
img
*
std
+
mean
img
=
img
.
astype
(
np
.
uint8
).
transpose
(
0
,
2
,
3
,
1
)
up
=
np
.
concatenate
([
img
[
0
,...],
img
[
1
,...],
img
[
2
,...]],
1
)
down
=
np
.
concatenate
([
img
[
3
,...],
img
[
4
,...],
img
[
5
,...]],
1
)
out
=
np
.
concatenate
([
up
,
down
],
0
)
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-rgb.jpg'
%
i
),
out
)
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-rgb.jpg'
%
i
))
if
args
.
draw_sem_gt
or
args
.
draw_pano_gt
:
occ_gt
=
np
.
load
(
os
.
path
.
join
(
val_loader
.
dataset
.
data_infos
[
i
][
'occ_path'
].
\
replace
(
'data/nuscenes/gts/'
,
'data/nuscenes/occ3d_panoptic/'
),
'labels.npz'
))
pano_gt
=
occ_gt
[
'instances'
]
sem_gt
=
occ_gt
[
'semantics'
]
if
args
.
draw_sem_gt
:
# sem_gt = np.array(data['voxel_semantics'][0])[0]
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-sem-gt.jpg'
%
i
),
occ2img
(
semantics
=
sem_gt
.
cpu
())[...,
::
-
1
])
if
args
.
draw_pano_gt
:
cv2
.
imwrite
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-pano-gt.jpg'
%
i
),
occ2img
(
semantics
=
sem_gt
,
is_pano
=
True
,
panoptics
=
pano_gt
)[...,
::
-
1
])
print
(
os
.
path
.
join
(
args
.
viz_dir
,
'%04d-pano-gt.jpg'
%
i
))
if
__name__
==
'__main__'
:
main
()
'''
exp_name=flashoccv2-r50-depth-tiny-pano
python tools/vis_occ.py --config projects/configs/flashoccv2/${exp_name}.py --weights work_dirs/${exp_name}/epoch_24_ema.pth --viz-dir vis/${exp_name} --draw-gt
exp_name=flashoccv2-r50-depth4d-longterm8f-pano
python tools/vis_occ.py --config projects/configs/flashoccv2/${exp_name}.py --weights work_dirs/${exp_name}/epoch_24_ema.pth --viz-dir vis/${exp_name} --draw-pano-gt #--draw-gt
'''
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log
0 → 100644
View file @
d2b71343
2026
-
04
-
03
16
:
24
:
21
,
502
-
mmdet
-
INFO
-
Environment
info
:
------------------------------------------------------------
sys
.
platform
:
linux
Python
:
3.10.12
(
main
,
Aug
15
2025
,
14
:
32
:
43
)
[
GCC
11.4.0
]
CUDA
available
:
True
GPU
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
:
BW1000_H
CUDA_HOME
:
/
opt
/
dtk
NVCC
:
Not
Available
GCC
:
x86_64
-
linux
-
gnu
-
gcc
(
Ubuntu
11.4.0
-
1u
buntu1
~
22.04
)
11.4.0
PyTorch
:
2.5.1
PyTorch
compiling
details
:
PyTorch
built
with
:
-
GCC
10.3
-
C
++
Version
:
201703
-
Intel
(
R
)
Math
Kernel
Library
Version
2020.0.4
Product
Build
20200917
for
Intel
(
R
)
64
architecture
applications
-
OpenMP
201511
(
a
.
k
.
a
.
OpenMP
4.5
)
-
LAPACK
is
enabled
(
usually
provided
by
MKL
)
-
NNPACK
is
enabled
-
CPU
capability
usage
:
AVX512
-
HIP
Runtime
6.3.25521
-
MIOpen
2.18.0
-
Magma
2.8.0
-
Build
settings
:
BLAS_INFO
=
mkl
,
BUILD_TYPE
=
Release
,
CXX_COMPILER
=/
opt
/
rh
/
gcc
-
toolset
-
10
/
root
/
usr
/
bin
/
c
++,
CXX_FLAGS
=
-
D_GLIBCXX_USE_CXX11_ABI
=
1
-
fvisibility
-
inlines
-
hidden
-
DUSE_PTHREADPOOL
-
DNDEBUG
-
DUSE_KINETO
-
DLIBKINETO_NOCUPTI
-
DLIBKINETO_NOXPUPTI
=
ON
-
DUSE_FBGEMM
-
DUSE_PYTORCH_QNNPACK
-
DUSE_XNNPACK
-
DSYMBOLICATE_MOBILE_DEBUG_HANDLE
-
O2
-
fPIC
-
Wall
-
Wextra
-
Werror
=
return
-
type
-
Werror
=
non
-
virtual
-
dtor
-
Werror
=
bool
-
operation
-
Wnarrowing
-
Wno
-
missing
-
field
-
initializers
-
Wno
-
type
-
limits
-
Wno
-
array
-
bounds
-
Wno
-
unknown
-
pragmas
-
Wno
-
unused
-
parameter
-
Wno
-
strict
-
overflow
-
Wno
-
strict
-
aliasing
-
Wno
-
stringop
-
overflow
-
Wsuggest
-
override
-
Wno
-
psabi
-
Wno
-
error
=
old
-
style
-
cast
-
Wno
-
missing
-
braces
-
fdiagnostics
-
color
=
always
-
faligned
-
new
-
Wno
-
unused
-
but
-
set
-
variable
-
Wno
-
maybe
-
uninitialized
-
fno
-
math
-
errno
-
fno
-
trapping
-
math
-
Werror
=
format
-
Wno
-
stringop
-
overflow
,
FORCE_FALLBACK_CUDA_MPI
=
1
,
LAPACK_INFO
=
mkl
,
PERF_WITH_AVX
=
1
,
PERF_WITH_AVX2
=
1
,
TORCH_VERSION
=
2.5.1
,
USE_CUDA
=
0
,
USE_CUDNN
=
OFF
,
USE_CUSPARSELT
=
OFF
,
USE_EXCEPTION_PTR
=
1
,
USE_GFLAGS
=
1
,
USE_GLOG
=
1
,
USE_GLOO
=
1
,
USE_MKL
=
ON
,
USE_MKLDNN
=
0
,
USE_MPI
=
1
,
USE_NCCL
=
1
,
USE_NNPACK
=
ON
,
USE_OPENMP
=
1
,
USE_ROCM
=
ON
,
USE_ROCM_KERNEL_ASSERT
=
OFF
,
TorchVision
:
0.20.1
OpenCV
:
4.12.0
MMCV
:
1.6.1
MMCV
Compiler
:
GCC
10.3
MMCV
CUDA
Compiler
:
rocm
not
available
MMDetection
:
2.25.1
MMSegmentation
:
0.25.0
MMDetection3D
:
1.0.0
rc4
+
spconv2
.0
:
False
------------------------------------------------------------
2026
-
04
-
03
16
:
24
:
22
,
070
-
mmdet
-
INFO
-
Distributed
training
:
True
2026
-
04
-
03
16
:
24
:
22
,
636
-
mmdet
-
INFO
-
Config
:
point_cloud_range
=
[-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
dataset_type
=
'NuScenesDatasetOccpancy'
data_root
=
'data/nuscenes/'
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
file_client_args
=
dict
(
backend
=
'disk'
)
train_pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
is_train
=
True
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'PointToMultiViewDepth'
,
downsample
=
1
,
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
])),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]),
dict
(
type
=
'Collect3D'
,
keys
=[
'img_inputs'
,
'gt_depth'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
])
]
test_pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
]
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
24
,
workers_per_gpu
=
24
,
train
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_train.pkl'
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
is_train
=
True
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'PointToMultiViewDepth'
,
downsample
=
1
,
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
])),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]),
dict
(
type
=
'Collect3D'
,
keys
=[
'img_inputs'
,
'gt_depth'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
])
],
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
False
,
box_type_3d
=
'LiDAR'
,
use_valid_flag
=
True
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
),
val
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
),
test
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
))
evaluation
=
dict
(
interval
=
1
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
start
=
20
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
5
)
log_config
=
dict
(
interval
=
1
,
hooks
=[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)])
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/flashocc-r50'
load_from
=
'ckpts/bevdet-r50-cbgs.pth'
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
opencv_num_threads
=
0
mp_start_method
=
'fork'
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
)
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
])
voxel_size
=
[
0.1
,
0.1
,
0.2
]
numC_Trans
=
64
model
=
dict
(
type
=
'BEVDetOCC'
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=(
2
,
3
),
frozen_stages
=-
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
False
,
with_cp
=
True
,
style
=
'pytorch'
),
img_neck
=
dict
(
type
=
'CustomFPN'
,
in_channels
=[
1024
,
2048
],
out_channels
=
256
,
num_outs
=
1
,
start_level
=
0
,
out_ids
=[
0
]),
img_view_transformer
=
dict
(
type
=
'LSSViewTransformer'
,
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
]),
input_size
=(
256
,
704
),
in_channels
=
256
,
out_channels
=
64
,
sid
=
False
,
collapse_z
=
True
,
downsample
=
16
),
img_bev_encoder_backbone
=
dict
(
type
=
'CustomResNet'
,
numC_input
=
64
,
num_channels
=[
128
,
256
,
512
]),
img_bev_encoder_neck
=
dict
(
type
=
'FPN_LSS'
,
in_channels
=
640
,
out_channels
=
256
),
occ_head
=
dict
(
type
=
'BEVOCCHead2D'
,
in_dim
=
256
,
out_dim
=
256
,
Dz
=
16
,
use_mask
=
True
,
num_classes
=
18
,
use_predicter
=
True
,
class_balance
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
ignore_index
=
255
,
loss_weight
=
1.0
)))
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
)
share_data_config
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
)
test_data_config
=
dict
(
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
)
key
=
'test'
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
5
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
200
,
warmup_ratio
=
0.001
,
step
=[
24
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
24
)
custom_hooks
=
[
dict
(
type
=
'MEGVIIEMAHook'
,
init_updates
=
10560
,
priority
=
'NORMAL'
)
]
gpu_ids
=
range
(
0
,
8
)
2026
-
04
-
03
16
:
24
:
22
,
636
-
mmdet
-
INFO
-
Set
random
seed
to
0
,
deterministic
:
False
2026
-
04
-
03
16
:
24
:
22
,
888
-
mmdet
-
INFO
-
initialize
ResNet
with
init_cfg
[{
'type'
:
'Kaiming'
,
'layer'
:
'Conv2d'
},
{
'type'
:
'Constant'
,
'val'
:
1
,
'layer'
:
[
'_BatchNorm'
,
'GroupNorm'
]}]
2026
-
04
-
03
16
:
24
:
22
,
995
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
995
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
996
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
996
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
997
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
997
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
998
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
22
,
999
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
000
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
000
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
001
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
002
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
003
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
005
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
008
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
010
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
24
:
23
,
021
-
mmdet
-
INFO
-
initialize
CustomFPN
with
init_cfg
{
'type'
:
'Xavier'
,
'layer'
:
'Conv2d'
,
'distribution'
:
'uniform'
}
Name
of
parameter
-
Initialization
information
img_backbone
.
conv1
.
weight
-
torch
.
Size
([
64
,
3
,
7
,
7
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
conv1
.
weight
-
torch
.
Size
([
64
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
conv2
.
weight
-
torch
.
Size
([
64
,
64
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
bn2
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
bn2
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
conv3
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
bn3
.
weight
-
torch
.
Size
([
256
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer1
.0
.
bn3
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
conv1
.
weight
-
torch
.
Size
([
64
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.1
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
conv2
.
weight
-
torch
.
Size
([
64
,
64
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.1
.
bn2
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
bn2
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
conv3
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.1
.
bn3
.
weight
-
torch
.
Size
([
256
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer1
.1
.
bn3
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
conv1
.
weight
-
torch
.
Size
([
64
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.2
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
conv2
.
weight
-
torch
.
Size
([
64
,
64
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.2
.
bn2
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
bn2
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
conv3
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.2
.
bn3
.
weight
-
torch
.
Size
([
256
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer1
.2
.
bn3
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
conv1
.
weight
-
torch
.
Size
([
128
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.0
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
512
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
conv1
.
weight
-
torch
.
Size
([
128
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.1
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.1
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.1
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.1
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
conv1
.
weight
-
torch
.
Size
([
128
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.2
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.2
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.2
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.2
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
conv1
.
weight
-
torch
.
Size
([
128
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.3
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.3
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.3
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.3
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
conv1
.
weight
-
torch
.
Size
([
256
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.0
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
1024
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.1
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.1
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.1
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.1
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.2
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.2
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.2
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.2
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.3
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.3
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.3
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.3
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.4
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.4
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.4
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.4
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.5
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.5
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.5
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.5
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
conv1
.
weight
-
torch
.
Size
([
512
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
conv3
.
weight
-
torch
.
Size
([
2048
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
bn3
.
weight
-
torch
.
Size
([
2048
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer4
.0
.
bn3
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
2048
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
conv1
.
weight
-
torch
.
Size
([
512
,
2048
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.1
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.1
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
conv3
.
weight
-
torch
.
Size
([
2048
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.1
.
bn3
.
weight
-
torch
.
Size
([
2048
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer4
.1
.
bn3
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
conv1
.
weight
-
torch
.
Size
([
512
,
2048
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.2
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.2
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
conv3
.
weight
-
torch
.
Size
([
2048
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.2
.
bn3
.
weight
-
torch
.
Size
([
2048
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer4
.2
.
bn3
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_neck
.
lateral_convs
.0
.
conv
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
XavierInit
:
gain
=
1
,
distribution
=
uniform
,
bias
=
0
img_neck
.
lateral_convs
.0
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_neck
.
lateral_convs
.1
.
conv
.
weight
-
torch
.
Size
([
256
,
2048
,
1
,
1
]):
XavierInit
:
gain
=
1
,
distribution
=
uniform
,
bias
=
0
img_neck
.
lateral_convs
.1
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_neck
.
fpn_convs
.0
.
conv
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
XavierInit
:
gain
=
1
,
distribution
=
uniform
,
bias
=
0
img_neck
.
fpn_convs
.0
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_view_transformer
.
depth_net
.
weight
-
torch
.
Size
([
152
,
256
,
1
,
1
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_view_transformer
.
depth_net
.
bias
-
torch
.
Size
([
152
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
conv1
.
weight
-
torch
.
Size
([
128
,
64
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
downsample
.
weight
-
torch
.
Size
([
128
,
64
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
downsample
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
conv1
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
conv1
.
weight
-
torch
.
Size
([
256
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
downsample
.
weight
-
torch
.
Size
([
256
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
downsample
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
conv1
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
conv1
.
weight
-
torch
.
Size
([
512
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
downsample
.
weight
-
torch
.
Size
([
512
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
downsample
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
conv1
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.0
.
weight
-
torch
.
Size
([
512
,
640
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.3
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.4
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.4
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.1
.
weight
-
torch
.
Size
([
256
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.4
.
weight
-
torch
.
Size
([
256
,
256
,
1
,
1
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.4
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
final_conv
.
conv
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
Initialized
by
user
-
defined
`
init_weights
`
in
ConvModule
occ_head
.
final_conv
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.0
.
weight
-
torch
.
Size
([
512
,
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.0
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.2
.
weight
-
torch
.
Size
([
288
,
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.2
.
bias
-
torch
.
Size
([
288
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
2026
-
04
-
03
16
:
24
:
23
,
032
-
mmdet
-
INFO
-
Model
:
BEVDetOCC
(
(
img_backbone
):
ResNet
(
(
conv1
):
Conv2d
(
3
,
64
,
kernel_size
=(
7
,
7
),
stride
=(
2
,
2
),
padding
=(
3
,
3
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
maxpool
):
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
dilation
=
1
,
ceil_mode
=
False
)
(
layer1
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
64
,
64
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
64
,
64
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
256
,
64
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
64
,
64
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
256
,
64
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
64
,
64
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
(
layer2
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
256
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
256
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
2
,
2
),
bias
=
False
)
(
1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
3
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
(
layer3
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
512
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
2
,
2
),
bias
=
False
)
(
1
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
3
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
4
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
5
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
(
layer4
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
512
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
1024
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
2
,
2
),
bias
=
False
)
(
1
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
2048
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
512
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
2048
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
512
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
)
init_cfg
=[{
'type'
:
'Kaiming'
,
'layer'
:
'Conv2d'
},
{
'type'
:
'Constant'
,
'val'
:
1
,
'layer'
:
[
'_BatchNorm'
,
'GroupNorm'
]}]
(
img_neck
):
CustomFPN
(
(
lateral_convs
):
ModuleList
(
(
0
):
ConvModule
(
(
conv
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
(
1
):
ConvModule
(
(
conv
):
Conv2d
(
2048
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
)
(
fpn_convs
):
ModuleList
(
(
0
):
ConvModule
(
(
conv
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
))
)
)
)
init_cfg
={
'type'
:
'Xavier'
,
'layer'
:
'Conv2d'
,
'distribution'
:
'uniform'
}
(
img_view_transformer
):
LSSViewTransformer
(
(
depth_net
):
Conv2d
(
256
,
152
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
(
img_bev_encoder_backbone
):
CustomResNet
(
(
layers
):
Sequential
(
(
0
):
Sequential
(
(
0
):
BasicBlock
(
(
conv1
):
Conv2d
(
64
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Conv2d
(
64
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
))
)
(
1
):
BasicBlock
(
(
conv1
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
)
(
1
):
Sequential
(
(
0
):
BasicBlock
(
(
conv1
):
Conv2d
(
128
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Conv2d
(
128
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
))
)
(
1
):
BasicBlock
(
(
conv1
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
)
(
2
):
Sequential
(
(
0
):
BasicBlock
(
(
conv1
):
Conv2d
(
256
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Conv2d
(
256
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
))
)
(
1
):
BasicBlock
(
(
conv1
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
)
)
)
(
img_bev_encoder_neck
):
FPN_LSS
(
(
up
):
Upsample
(
scale_factor
=
4.0
,
mode
=
'bilinear'
)
(
conv
):
Sequential
(
(
0
):
Conv2d
(
640
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
2
):
ReLU
(
inplace
=
True
)
(
3
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
4
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
5
):
ReLU
(
inplace
=
True
)
)
(
up2
):
Sequential
(
(
0
):
Upsample
(
scale_factor
=
2.0
,
mode
=
'bilinear'
)
(
1
):
Conv2d
(
512
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
3
):
ReLU
(
inplace
=
True
)
(
4
):
Conv2d
(
256
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
)
(
occ_head
):
BEVOCCHead2D
(
(
final_conv
):
ConvModule
(
(
conv
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
))
(
activate
):
ReLU
(
inplace
=
True
)
)
(
predicter
):
Sequential
(
(
0
):
Linear
(
in_features
=
256
,
out_features
=
512
,
bias
=
True
)
(
1
):
Softplus
(
beta
=
1.0
,
threshold
=
20.0
)
(
2
):
Linear
(
in_features
=
512
,
out_features
=
288
,
bias
=
True
)
)
(
loss_occ
):
CrossEntropyLoss
(
avg_non_ignore
=
False
)
)
)
2026
-
04
-
03
16
:
24
:
32
,
890
-
mmdet
-
INFO
-
load
checkpoint
from
local
path
:
ckpts
/
bevdet
-
r50
-
cbgs
.
pth
2026
-
04
-
03
16
:
24
:
32
,
998
-
mmdet
-
WARNING
-
The
model
and
loaded
state
dict
do
not
match
exactly
size
mismatch
for
img_view_transformer
.
depth_net
.
weight
:
copying
a
param
with
shape
torch
.
Size
([
123
,
256
,
1
,
1
])
from
checkpoint
,
the
shape
in
current
model
is
torch
.
Size
([
152
,
256
,
1
,
1
]).
size
mismatch
for
img_view_transformer
.
depth_net
.
bias
:
copying
a
param
with
shape
torch
.
Size
([
123
])
from
checkpoint
,
the
shape
in
current
model
is
torch
.
Size
([
152
]).
unexpected
key
in
source
state_dict
:
pts_bbox_head
.
shared_conv
.
conv
.
weight
,
pts_bbox_head
.
shared_conv
.
bn
.
weight
,
pts_bbox_head
.
shared_conv
.
bn
.
bias
,
pts_bbox_head
.
shared_conv
.
bn
.
running_mean
,
pts_bbox_head
.
shared_conv
.
bn
.
running_var
,
pts_bbox_head
.
shared_conv
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
reg
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
reg
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
height
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
height
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
dim
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
dim
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
rot
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
rot
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
vel
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
vel
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.1
.
bias
missing
keys
in
source
state_dict
:
occ_head
.
final_conv
.
conv
.
weight
,
occ_head
.
final_conv
.
conv
.
bias
,
occ_head
.
predicter
.0
.
weight
,
occ_head
.
predicter
.0
.
bias
,
occ_head
.
predicter
.2
.
weight
,
occ_head
.
predicter
.2
.
bias
2026
-
04
-
03
16
:
24
:
33
,
000
-
mmdet
-
INFO
-
Start
running
,
host
:
root
@
bw61
,
work_dir
:
/
workspace
/
Flashocc
/
work_dirs
/
flashocc
-
r50
2026
-
04
-
03
16
:
24
:
33
,
001
-
mmdet
-
INFO
-
Hooks
will
be
executed
in
the
following
order
:
before_run
:
(
VERY_HIGH
)
StepLrUpdaterHook
(
NORMAL
)
CheckpointHook
(
NORMAL
)
MEGVIIEMAHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_train_epoch
:
(
VERY_HIGH
)
StepLrUpdaterHook
(
NORMAL
)
DistSamplerSeedHook
(
LOW
)
IterTimerHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_train_iter
:
(
VERY_HIGH
)
StepLrUpdaterHook
(
LOW
)
IterTimerHook
--------------------
after_train_iter
:
(
ABOVE_NORMAL
)
OptimizerHook
(
NORMAL
)
CheckpointHook
(
NORMAL
)
MEGVIIEMAHook
(
LOW
)
IterTimerHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
after_train_epoch
:
(
NORMAL
)
CheckpointHook
(
NORMAL
)
MEGVIIEMAHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_val_epoch
:
(
NORMAL
)
DistSamplerSeedHook
(
LOW
)
IterTimerHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_val_iter
:
(
LOW
)
IterTimerHook
--------------------
after_val_iter
:
(
LOW
)
IterTimerHook
--------------------
after_val_epoch
:
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
after_run
:
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
2026
-
04
-
03
16
:
24
:
33
,
001
-
mmdet
-
INFO
-
workflow
:
[(
'train'
,
1
)],
max
:
24
epochs
2026
-
04
-
03
16
:
24
:
33
,
001
-
mmdet
-
INFO
-
Checkpoints
will
be
saved
to
/
workspace
/
Flashocc
/
work_dirs
/
flashocc
-
r50
by
HardDiskBackend
.
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log.json
0 → 100644
View file @
d2b71343
{
"env_info"
:
"sys.platform: linux
\n
Python: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]
\n
CUDA available: True
\n
GPU 0,1,2,3,4,5,6,7: BW1000_H
\n
CUDA_HOME: /opt/dtk
\n
NVCC: Not Available
\n
GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
\n
PyTorch: 2.5.1
\n
PyTorch compiling details: PyTorch built with:
\n
- GCC 10.3
\n
- C++ Version: 201703
\n
- Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications
\n
- OpenMP 201511 (a.k.a. OpenMP 4.5)
\n
- LAPACK is enabled (usually provided by MKL)
\n
- NNPACK is enabled
\n
- CPU capability usage: AVX512
\n
- HIP Runtime 6.3.25521
\n
- MIOpen 2.18.0
\n
- Magma 2.8.0
\n
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF,
\n\n
TorchVision: 0.20.1
\n
OpenCV: 4.12.0
\n
MMCV: 1.6.1
\n
MMCV Compiler: GCC 10.3
\n
MMCV CUDA Compiler: rocm not available
\n
MMDetection: 2.25.1
\n
MMSegmentation: 0.25.0
\n
MMDetection3D: 1.0.0rc4+
\n
spconv2.0: False"
,
"config"
:
"point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
\n
class_names = [
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
\n
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
]
\n
dataset_type = 'NuScenesDatasetOccpancy'
\n
data_root = 'data/nuscenes/'
\n
input_modality = dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False)
\n
file_client_args = dict(backend='disk')
\n
train_pipeline = [
\n
dict(
\n
type='PrepareImageInputs',
\n
is_train=True,
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
is_train=True),
\n
dict(type='LoadOccGTFromFile'),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='PointToMultiViewDepth',
\n
downsample=1,
\n
grid_config=dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5])),
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
]),
\n
dict(
\n
type='Collect3D',
\n
keys=[
\n
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
\n
'mask_camera'
\n
])
\n
]
\n
test_pipeline = [
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
]
\n
eval_pipeline = [
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='LoadPointsFromMultiSweeps',
\n
sweeps_num=10,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
\n
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points'])
\n
]
\n
data = dict(
\n
samples_per_gpu=24,
\n
workers_per_gpu=24,
\n
train=dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
is_train=True,
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=True),
\n
dict(type='LoadOccGTFromFile'),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='PointToMultiViewDepth',
\n
downsample=1,
\n
grid_config=dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5])),
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
]),
\n
dict(
\n
type='Collect3D',
\n
keys=[
\n
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
\n
'mask_camera'
\n
])
\n
],
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
test_mode=False,
\n
box_type_3d='LiDAR',
\n
use_valid_flag=True,
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet'),
\n
val=dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
test_mode=True,
\n
box_type_3d='LiDAR',
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet'),
\n
test=dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
test_mode=True,
\n
box_type_3d='LiDAR',
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet'))
\n
evaluation = dict(
\n
interval=1,
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
start=20)
\n
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
\n
log_config = dict(
\n
interval=1,
\n
hooks=[dict(type='TextLoggerHook'),
\n
dict(type='TensorboardLoggerHook')])
\n
dist_params = dict(backend='nccl')
\n
log_level = 'INFO'
\n
work_dir = './work_dirs/flashocc-r50'
\n
load_from = 'ckpts/bevdet-r50-cbgs.pth'
\n
resume_from = None
\n
workflow = [('train', 1)]
\n
opencv_num_threads = 0
\n
mp_start_method = 'fork'
\n
plugin = True
\n
plugin_dir = 'projects/mmdet3d_plugin/'
\n
data_config = dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
\n
'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0)
\n
grid_config = dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5])
\n
voxel_size = [0.1, 0.1, 0.2]
\n
numC_Trans = 64
\n
model = dict(
\n
type='BEVDetOCC',
\n
img_backbone=dict(
\n
type='ResNet',
\n
depth=50,
\n
num_stages=4,
\n
out_indices=(2, 3),
\n
frozen_stages=-1,
\n
norm_cfg=dict(type='BN', requires_grad=True),
\n
norm_eval=False,
\n
with_cp=True,
\n
style='pytorch'),
\n
img_neck=dict(
\n
type='CustomFPN',
\n
in_channels=[1024, 2048],
\n
out_channels=256,
\n
num_outs=1,
\n
start_level=0,
\n
out_ids=[0]),
\n
img_view_transformer=dict(
\n
type='LSSViewTransformer',
\n
grid_config=dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5]),
\n
input_size=(256, 704),
\n
in_channels=256,
\n
out_channels=64,
\n
sid=False,
\n
collapse_z=True,
\n
downsample=16),
\n
img_bev_encoder_backbone=dict(
\n
type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
\n
img_bev_encoder_neck=dict(
\n
type='FPN_LSS', in_channels=640, out_channels=256),
\n
occ_head=dict(
\n
type='BEVOCCHead2D',
\n
in_dim=256,
\n
out_dim=256,
\n
Dz=16,
\n
use_mask=True,
\n
num_classes=18,
\n
use_predicter=True,
\n
class_balance=False,
\n
loss_occ=dict(
\n
type='CrossEntropyLoss',
\n
use_sigmoid=False,
\n
ignore_index=255,
\n
loss_weight=1.0)))
\n
bda_aug_conf = dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5)
\n
share_data_config = dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
\n
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet')
\n
test_data_config = dict(
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
\n
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet')
\n
key = 'test'
\n
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
\n
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
\n
lr_config = dict(
\n
policy='step',
\n
warmup='linear',
\n
warmup_iters=200,
\n
warmup_ratio=0.001,
\n
step=[24])
\n
runner = dict(type='EpochBasedRunner', max_epochs=24)
\n
custom_hooks = [
\n
dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
\n
]
\n
gpu_ids = range(0, 8)
\n
"
,
"seed"
:
0
,
"exp_name"
:
"flashocc-r50.py"
}
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log
0 → 100644
View file @
d2b71343
2026
-
04
-
03
16
:
26
:
51
,
896
-
mmdet
-
INFO
-
Environment
info
:
------------------------------------------------------------
sys
.
platform
:
linux
Python
:
3.10.12
(
main
,
Aug
15
2025
,
14
:
32
:
43
)
[
GCC
11.4.0
]
CUDA
available
:
True
GPU
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
:
BW1000_H
CUDA_HOME
:
/
opt
/
dtk
NVCC
:
Not
Available
GCC
:
x86_64
-
linux
-
gnu
-
gcc
(
Ubuntu
11.4.0
-
1u
buntu1
~
22.04
)
11.4.0
PyTorch
:
2.5.1
PyTorch
compiling
details
:
PyTorch
built
with
:
-
GCC
10.3
-
C
++
Version
:
201703
-
Intel
(
R
)
Math
Kernel
Library
Version
2020.0.4
Product
Build
20200917
for
Intel
(
R
)
64
architecture
applications
-
OpenMP
201511
(
a
.
k
.
a
.
OpenMP
4.5
)
-
LAPACK
is
enabled
(
usually
provided
by
MKL
)
-
NNPACK
is
enabled
-
CPU
capability
usage
:
AVX512
-
HIP
Runtime
6.3.25521
-
MIOpen
2.18.0
-
Magma
2.8.0
-
Build
settings
:
BLAS_INFO
=
mkl
,
BUILD_TYPE
=
Release
,
CXX_COMPILER
=/
opt
/
rh
/
gcc
-
toolset
-
10
/
root
/
usr
/
bin
/
c
++,
CXX_FLAGS
=
-
D_GLIBCXX_USE_CXX11_ABI
=
1
-
fvisibility
-
inlines
-
hidden
-
DUSE_PTHREADPOOL
-
DNDEBUG
-
DUSE_KINETO
-
DLIBKINETO_NOCUPTI
-
DLIBKINETO_NOXPUPTI
=
ON
-
DUSE_FBGEMM
-
DUSE_PYTORCH_QNNPACK
-
DUSE_XNNPACK
-
DSYMBOLICATE_MOBILE_DEBUG_HANDLE
-
O2
-
fPIC
-
Wall
-
Wextra
-
Werror
=
return
-
type
-
Werror
=
non
-
virtual
-
dtor
-
Werror
=
bool
-
operation
-
Wnarrowing
-
Wno
-
missing
-
field
-
initializers
-
Wno
-
type
-
limits
-
Wno
-
array
-
bounds
-
Wno
-
unknown
-
pragmas
-
Wno
-
unused
-
parameter
-
Wno
-
strict
-
overflow
-
Wno
-
strict
-
aliasing
-
Wno
-
stringop
-
overflow
-
Wsuggest
-
override
-
Wno
-
psabi
-
Wno
-
error
=
old
-
style
-
cast
-
Wno
-
missing
-
braces
-
fdiagnostics
-
color
=
always
-
faligned
-
new
-
Wno
-
unused
-
but
-
set
-
variable
-
Wno
-
maybe
-
uninitialized
-
fno
-
math
-
errno
-
fno
-
trapping
-
math
-
Werror
=
format
-
Wno
-
stringop
-
overflow
,
FORCE_FALLBACK_CUDA_MPI
=
1
,
LAPACK_INFO
=
mkl
,
PERF_WITH_AVX
=
1
,
PERF_WITH_AVX2
=
1
,
TORCH_VERSION
=
2.5.1
,
USE_CUDA
=
0
,
USE_CUDNN
=
OFF
,
USE_CUSPARSELT
=
OFF
,
USE_EXCEPTION_PTR
=
1
,
USE_GFLAGS
=
1
,
USE_GLOG
=
1
,
USE_GLOO
=
1
,
USE_MKL
=
ON
,
USE_MKLDNN
=
0
,
USE_MPI
=
1
,
USE_NCCL
=
1
,
USE_NNPACK
=
ON
,
USE_OPENMP
=
1
,
USE_ROCM
=
ON
,
USE_ROCM_KERNEL_ASSERT
=
OFF
,
TorchVision
:
0.20.1
OpenCV
:
4.12.0
MMCV
:
1.6.1
MMCV
Compiler
:
GCC
10.3
MMCV
CUDA
Compiler
:
rocm
not
available
MMDetection
:
2.25.1
MMSegmentation
:
0.25.0
MMDetection3D
:
1.0.0
rc4
+
spconv2
.0
:
False
------------------------------------------------------------
2026
-
04
-
03
16
:
26
:
52
,
455
-
mmdet
-
INFO
-
Distributed
training
:
True
2026
-
04
-
03
16
:
26
:
53
,
024
-
mmdet
-
INFO
-
Config
:
point_cloud_range
=
[-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
dataset_type
=
'NuScenesDatasetOccpancy'
data_root
=
'data/nuscenes/'
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
file_client_args
=
dict
(
backend
=
'disk'
)
train_pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
is_train
=
True
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'PointToMultiViewDepth'
,
downsample
=
1
,
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
])),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]),
dict
(
type
=
'Collect3D'
,
keys
=[
'img_inputs'
,
'gt_depth'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
])
]
test_pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
]
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
24
,
workers_per_gpu
=
24
,
train
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_train.pkl'
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
is_train
=
True
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'PointToMultiViewDepth'
,
downsample
=
1
,
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
])),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]),
dict
(
type
=
'Collect3D'
,
keys
=[
'img_inputs'
,
'gt_depth'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
])
],
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
False
,
box_type_3d
=
'LiDAR'
,
use_valid_flag
=
True
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
),
val
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
),
test
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
))
evaluation
=
dict
(
interval
=
1
,
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
start
=
20
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
5
)
log_config
=
dict
(
interval
=
1
,
hooks
=[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)])
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/flashocc-r50'
load_from
=
'ckpts/bevdet-r50-cbgs.pth'
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
opencv_num_threads
=
0
mp_start_method
=
'fork'
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
)
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
])
voxel_size
=
[
0.1
,
0.1
,
0.2
]
numC_Trans
=
64
model
=
dict
(
type
=
'BEVDetOCC'
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=(
2
,
3
),
frozen_stages
=-
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
False
,
with_cp
=
True
,
style
=
'pytorch'
),
img_neck
=
dict
(
type
=
'CustomFPN'
,
in_channels
=[
1024
,
2048
],
out_channels
=
256
,
num_outs
=
1
,
start_level
=
0
,
out_ids
=[
0
]),
img_view_transformer
=
dict
(
type
=
'LSSViewTransformer'
,
grid_config
=
dict
(
x
=[-
40
,
40
,
0.4
],
y
=[-
40
,
40
,
0.4
],
z
=[-
1
,
5.4
,
6.4
],
depth
=[
1.0
,
45.0
,
0.5
]),
input_size
=(
256
,
704
),
in_channels
=
256
,
out_channels
=
64
,
sid
=
False
,
collapse_z
=
True
,
downsample
=
16
),
img_bev_encoder_backbone
=
dict
(
type
=
'CustomResNet'
,
numC_input
=
64
,
num_channels
=[
128
,
256
,
512
]),
img_bev_encoder_neck
=
dict
(
type
=
'FPN_LSS'
,
in_channels
=
640
,
out_channels
=
256
),
occ_head
=
dict
(
type
=
'BEVOCCHead2D'
,
in_dim
=
256
,
out_dim
=
256
,
Dz
=
16
,
use_mask
=
True
,
num_classes
=
18
,
use_predicter
=
True
,
class_balance
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
ignore_index
=
255
,
loss_weight
=
1.0
)))
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
)
share_data_config
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
)
test_data_config
=
dict
(
pipeline
=[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=(
256
,
704
),
src_size
=(
900
,
1600
),
resize
=(-
0.06
,
0.11
),
rot
=(-
5.4
,
5.4
),
flip
=
True
,
crop_h
=(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=(-
0.0
,
0.0
),
scale_lim
=(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=[
'points'
,
'img_inputs'
])
])
],
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
classes
=[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
)
key
=
'test'
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
5
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
200
,
warmup_ratio
=
0.001
,
step
=[
24
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
24
)
custom_hooks
=
[
dict
(
type
=
'MEGVIIEMAHook'
,
init_updates
=
10560
,
priority
=
'NORMAL'
)
]
gpu_ids
=
range
(
0
,
8
)
2026
-
04
-
03
16
:
26
:
53
,
024
-
mmdet
-
INFO
-
Set
random
seed
to
0
,
deterministic
:
False
2026
-
04
-
03
16
:
26
:
53
,
275
-
mmdet
-
INFO
-
initialize
ResNet
with
init_cfg
[{
'type'
:
'Kaiming'
,
'layer'
:
'Conv2d'
},
{
'type'
:
'Constant'
,
'val'
:
1
,
'layer'
:
[
'_BatchNorm'
,
'GroupNorm'
]}]
2026
-
04
-
03
16
:
26
:
53
,
383
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
383
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
383
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
384
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
385
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
385
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
386
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
387
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
388
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
388
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
389
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
390
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
391
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
393
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
396
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
399
-
mmdet
-
INFO
-
initialize
Bottleneck
with
init_cfg
{
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
2026
-
04
-
03
16
:
26
:
53
,
410
-
mmdet
-
INFO
-
initialize
CustomFPN
with
init_cfg
{
'type'
:
'Xavier'
,
'layer'
:
'Conv2d'
,
'distribution'
:
'uniform'
}
Name
of
parameter
-
Initialization
information
img_backbone
.
conv1
.
weight
-
torch
.
Size
([
64
,
3
,
7
,
7
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
conv1
.
weight
-
torch
.
Size
([
64
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
conv2
.
weight
-
torch
.
Size
([
64
,
64
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
bn2
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
bn2
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
conv3
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
bn3
.
weight
-
torch
.
Size
([
256
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer1
.0
.
bn3
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
conv1
.
weight
-
torch
.
Size
([
64
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.1
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
conv2
.
weight
-
torch
.
Size
([
64
,
64
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.1
.
bn2
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
bn2
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.1
.
conv3
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.1
.
bn3
.
weight
-
torch
.
Size
([
256
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer1
.1
.
bn3
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
conv1
.
weight
-
torch
.
Size
([
64
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.2
.
bn1
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
bn1
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
conv2
.
weight
-
torch
.
Size
([
64
,
64
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.2
.
bn2
.
weight
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
bn2
.
bias
-
torch
.
Size
([
64
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer1
.2
.
conv3
.
weight
-
torch
.
Size
([
256
,
64
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer1
.2
.
bn3
.
weight
-
torch
.
Size
([
256
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer1
.2
.
bn3
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
conv1
.
weight
-
torch
.
Size
([
128
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.0
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
512
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
conv1
.
weight
-
torch
.
Size
([
128
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.1
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.1
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.1
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.1
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.1
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
conv1
.
weight
-
torch
.
Size
([
128
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.2
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.2
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.2
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.2
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.2
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
conv1
.
weight
-
torch
.
Size
([
128
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.3
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.3
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer2
.3
.
conv3
.
weight
-
torch
.
Size
([
512
,
128
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer2
.3
.
bn3
.
weight
-
torch
.
Size
([
512
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer2
.3
.
bn3
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
conv1
.
weight
-
torch
.
Size
([
256
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.0
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
1024
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.1
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.1
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.1
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.1
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.1
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.2
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.2
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.2
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.2
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.2
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.3
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.3
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.3
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.3
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.3
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.4
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.4
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.4
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.4
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.4
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
conv1
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.5
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.5
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer3
.5
.
conv3
.
weight
-
torch
.
Size
([
1024
,
256
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer3
.5
.
bn3
.
weight
-
torch
.
Size
([
1024
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer3
.5
.
bn3
.
bias
-
torch
.
Size
([
1024
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
conv1
.
weight
-
torch
.
Size
([
512
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
conv3
.
weight
-
torch
.
Size
([
2048
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
bn3
.
weight
-
torch
.
Size
([
2048
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer4
.0
.
bn3
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
downsample
.0
.
weight
-
torch
.
Size
([
2048
,
1024
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.0
.
downsample
.1
.
weight
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.0
.
downsample
.1
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
conv1
.
weight
-
torch
.
Size
([
512
,
2048
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.1
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.1
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.1
.
conv3
.
weight
-
torch
.
Size
([
2048
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.1
.
bn3
.
weight
-
torch
.
Size
([
2048
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer4
.1
.
bn3
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
conv1
.
weight
-
torch
.
Size
([
512
,
2048
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.2
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.2
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_backbone
.
layer4
.2
.
conv3
.
weight
-
torch
.
Size
([
2048
,
512
,
1
,
1
]):
KaimingInit
:
a
=
0
,
mode
=
fan_out
,
nonlinearity
=
relu
,
distribution
=
normal
,
bias
=
0
img_backbone
.
layer4
.2
.
bn3
.
weight
-
torch
.
Size
([
2048
]):
ConstantInit
:
val
=
0
,
bias
=
0
img_backbone
.
layer4
.2
.
bn3
.
bias
-
torch
.
Size
([
2048
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_neck
.
lateral_convs
.0
.
conv
.
weight
-
torch
.
Size
([
256
,
1024
,
1
,
1
]):
XavierInit
:
gain
=
1
,
distribution
=
uniform
,
bias
=
0
img_neck
.
lateral_convs
.0
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_neck
.
lateral_convs
.1
.
conv
.
weight
-
torch
.
Size
([
256
,
2048
,
1
,
1
]):
XavierInit
:
gain
=
1
,
distribution
=
uniform
,
bias
=
0
img_neck
.
lateral_convs
.1
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_neck
.
fpn_convs
.0
.
conv
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
XavierInit
:
gain
=
1
,
distribution
=
uniform
,
bias
=
0
img_neck
.
fpn_convs
.0
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_view_transformer
.
depth_net
.
weight
-
torch
.
Size
([
152
,
256
,
1
,
1
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_view_transformer
.
depth_net
.
bias
-
torch
.
Size
([
152
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
conv1
.
weight
-
torch
.
Size
([
128
,
64
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
downsample
.
weight
-
torch
.
Size
([
128
,
64
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.0
.
downsample
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
conv1
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn1
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn1
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
conv2
.
weight
-
torch
.
Size
([
128
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn2
.
weight
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.0.1
.
bn2
.
bias
-
torch
.
Size
([
128
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
conv1
.
weight
-
torch
.
Size
([
256
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
downsample
.
weight
-
torch
.
Size
([
256
,
128
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.0
.
downsample
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
conv1
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn1
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn1
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
conv2
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.1.1
.
bn2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
conv1
.
weight
-
torch
.
Size
([
512
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
downsample
.
weight
-
torch
.
Size
([
512
,
256
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.0
.
downsample
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
conv1
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
conv2
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn2
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_backbone
.
layers
.2.1
.
bn2
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.0
.
weight
-
torch
.
Size
([
512
,
640
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.1
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.1
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.3
.
weight
-
torch
.
Size
([
512
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.4
.
weight
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
conv
.4
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.1
.
weight
-
torch
.
Size
([
256
,
512
,
3
,
3
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.2
.
weight
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.2
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.4
.
weight
-
torch
.
Size
([
256
,
256
,
1
,
1
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
img_bev_encoder_neck
.
up2
.4
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
final_conv
.
conv
.
weight
-
torch
.
Size
([
256
,
256
,
3
,
3
]):
Initialized
by
user
-
defined
`
init_weights
`
in
ConvModule
occ_head
.
final_conv
.
conv
.
bias
-
torch
.
Size
([
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.0
.
weight
-
torch
.
Size
([
512
,
256
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.0
.
bias
-
torch
.
Size
([
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.2
.
weight
-
torch
.
Size
([
288
,
512
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
occ_head
.
predicter
.2
.
bias
-
torch
.
Size
([
288
]):
The
value
is
the
same
before
and
after
calling
`
init_weights
`
of
BEVDetOCC
2026
-
04
-
03
16
:
26
:
53
,
421
-
mmdet
-
INFO
-
Model
:
BEVDetOCC
(
(
img_backbone
):
ResNet
(
(
conv1
):
Conv2d
(
3
,
64
,
kernel_size
=(
7
,
7
),
stride
=(
2
,
2
),
padding
=(
3
,
3
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
maxpool
):
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
dilation
=
1
,
ceil_mode
=
False
)
(
layer1
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
64
,
64
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
64
,
64
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
256
,
64
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
64
,
64
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
256
,
64
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
64
,
64
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
64
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
64
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
(
layer2
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
256
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
256
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
2
,
2
),
bias
=
False
)
(
1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
3
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
128
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
128
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
(
layer3
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
512
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
512
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
2
,
2
),
bias
=
False
)
(
1
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
3
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
4
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
5
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
256
,
1024
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
1024
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
(
layer4
):
ResLayer
(
(
0
):
Bottleneck
(
(
conv1
):
Conv2d
(
1024
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
512
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Sequential
(
(
0
):
Conv2d
(
1024
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
2
,
2
),
bias
=
False
)
(
1
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
1
):
Bottleneck
(
(
conv1
):
Conv2d
(
2048
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
512
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
(
2
):
Bottleneck
(
(
conv1
):
Conv2d
(
2048
,
512
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv3
):
Conv2d
(
512
,
2048
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
),
bias
=
False
)
(
bn3
):
BatchNorm2d
(
2048
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
init_cfg
={
'type'
:
'Constant'
,
'val'
:
0
,
'override'
:
{
'name'
:
'norm3'
}}
)
)
init_cfg
=[{
'type'
:
'Kaiming'
,
'layer'
:
'Conv2d'
},
{
'type'
:
'Constant'
,
'val'
:
1
,
'layer'
:
[
'_BatchNorm'
,
'GroupNorm'
]}]
(
img_neck
):
CustomFPN
(
(
lateral_convs
):
ModuleList
(
(
0
):
ConvModule
(
(
conv
):
Conv2d
(
1024
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
(
1
):
ConvModule
(
(
conv
):
Conv2d
(
2048
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
)
(
fpn_convs
):
ModuleList
(
(
0
):
ConvModule
(
(
conv
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
))
)
)
)
init_cfg
={
'type'
:
'Xavier'
,
'layer'
:
'Conv2d'
,
'distribution'
:
'uniform'
}
(
img_view_transformer
):
LSSViewTransformer
(
(
depth_net
):
Conv2d
(
256
,
152
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
(
img_bev_encoder_backbone
):
CustomResNet
(
(
layers
):
Sequential
(
(
0
):
Sequential
(
(
0
):
BasicBlock
(
(
conv1
):
Conv2d
(
64
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Conv2d
(
64
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
))
)
(
1
):
BasicBlock
(
(
conv1
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
128
,
128
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
128
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
)
(
1
):
Sequential
(
(
0
):
BasicBlock
(
(
conv1
):
Conv2d
(
128
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Conv2d
(
128
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
))
)
(
1
):
BasicBlock
(
(
conv1
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
)
(
2
):
Sequential
(
(
0
):
BasicBlock
(
(
conv1
):
Conv2d
(
256
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
(
downsample
):
Conv2d
(
256
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
2
,
2
),
padding
=(
1
,
1
))
)
(
1
):
BasicBlock
(
(
conv1
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
conv2
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
bn2
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
relu
):
ReLU
(
inplace
=
True
)
)
)
)
)
(
img_bev_encoder_neck
):
FPN_LSS
(
(
up
):
Upsample
(
scale_factor
=
4.0
,
mode
=
'bilinear'
)
(
conv
):
Sequential
(
(
0
):
Conv2d
(
640
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
1
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
2
):
ReLU
(
inplace
=
True
)
(
3
):
Conv2d
(
512
,
512
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
4
):
BatchNorm2d
(
512
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
5
):
ReLU
(
inplace
=
True
)
)
(
up2
):
Sequential
(
(
0
):
Upsample
(
scale_factor
=
2.0
,
mode
=
'bilinear'
)
(
1
):
Conv2d
(
512
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
),
bias
=
False
)
(
2
):
BatchNorm2d
(
256
,
eps
=
1e-05
,
momentum
=
0.1
,
affine
=
True
,
track_running_stats
=
True
)
(
3
):
ReLU
(
inplace
=
True
)
(
4
):
Conv2d
(
256
,
256
,
kernel_size
=(
1
,
1
),
stride
=(
1
,
1
))
)
)
(
occ_head
):
BEVOCCHead2D
(
(
final_conv
):
ConvModule
(
(
conv
):
Conv2d
(
256
,
256
,
kernel_size
=(
3
,
3
),
stride
=(
1
,
1
),
padding
=(
1
,
1
))
(
activate
):
ReLU
(
inplace
=
True
)
)
(
predicter
):
Sequential
(
(
0
):
Linear
(
in_features
=
256
,
out_features
=
512
,
bias
=
True
)
(
1
):
Softplus
(
beta
=
1.0
,
threshold
=
20.0
)
(
2
):
Linear
(
in_features
=
512
,
out_features
=
288
,
bias
=
True
)
)
(
loss_occ
):
CrossEntropyLoss
(
avg_non_ignore
=
False
)
)
)
2026
-
04
-
03
16
:
26
:
58
,
319
-
mmdet
-
INFO
-
load
checkpoint
from
local
path
:
ckpts
/
bevdet
-
r50
-
cbgs
.
pth
2026
-
04
-
03
16
:
26
:
58
,
420
-
mmdet
-
WARNING
-
The
model
and
loaded
state
dict
do
not
match
exactly
size
mismatch
for
img_view_transformer
.
depth_net
.
weight
:
copying
a
param
with
shape
torch
.
Size
([
123
,
256
,
1
,
1
])
from
checkpoint
,
the
shape
in
current
model
is
torch
.
Size
([
152
,
256
,
1
,
1
]).
size
mismatch
for
img_view_transformer
.
depth_net
.
bias
:
copying
a
param
with
shape
torch
.
Size
([
123
])
from
checkpoint
,
the
shape
in
current
model
is
torch
.
Size
([
152
]).
unexpected
key
in
source
state_dict
:
pts_bbox_head
.
shared_conv
.
conv
.
weight
,
pts_bbox_head
.
shared_conv
.
bn
.
weight
,
pts_bbox_head
.
shared_conv
.
bn
.
bias
,
pts_bbox_head
.
shared_conv
.
bn
.
running_mean
,
pts_bbox_head
.
shared_conv
.
bn
.
running_var
,
pts_bbox_head
.
shared_conv
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
reg
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
reg
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
reg
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
height
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
height
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
height
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
dim
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
dim
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
dim
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
rot
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
rot
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
rot
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
vel
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
vel
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
vel
.1
.
bias
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
conv
.
weight
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
weight
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
bias
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
running_mean
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
running_var
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.0
.
bn
.
num_batches_tracked
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.1
.
weight
,
pts_bbox_head
.
task_heads
.0
.
heatmap
.1
.
bias
missing
keys
in
source
state_dict
:
occ_head
.
final_conv
.
conv
.
weight
,
occ_head
.
final_conv
.
conv
.
bias
,
occ_head
.
predicter
.0
.
weight
,
occ_head
.
predicter
.0
.
bias
,
occ_head
.
predicter
.2
.
weight
,
occ_head
.
predicter
.2
.
bias
2026
-
04
-
03
16
:
26
:
58
,
422
-
mmdet
-
INFO
-
Start
running
,
host
:
root
@
bw61
,
work_dir
:
/
workspace
/
Flashocc
/
work_dirs
/
flashocc
-
r50
2026
-
04
-
03
16
:
26
:
58
,
422
-
mmdet
-
INFO
-
Hooks
will
be
executed
in
the
following
order
:
before_run
:
(
VERY_HIGH
)
StepLrUpdaterHook
(
NORMAL
)
CheckpointHook
(
NORMAL
)
MEGVIIEMAHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_train_epoch
:
(
VERY_HIGH
)
StepLrUpdaterHook
(
NORMAL
)
DistSamplerSeedHook
(
LOW
)
IterTimerHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_train_iter
:
(
VERY_HIGH
)
StepLrUpdaterHook
(
LOW
)
IterTimerHook
--------------------
after_train_iter
:
(
ABOVE_NORMAL
)
OptimizerHook
(
NORMAL
)
CheckpointHook
(
NORMAL
)
MEGVIIEMAHook
(
LOW
)
IterTimerHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
after_train_epoch
:
(
NORMAL
)
CheckpointHook
(
NORMAL
)
MEGVIIEMAHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_val_epoch
:
(
NORMAL
)
DistSamplerSeedHook
(
LOW
)
IterTimerHook
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
before_val_iter
:
(
LOW
)
IterTimerHook
--------------------
after_val_iter
:
(
LOW
)
IterTimerHook
--------------------
after_val_epoch
:
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
after_run
:
(
VERY_LOW
)
TextLoggerHook
(
VERY_LOW
)
TensorboardLoggerHook
--------------------
2026
-
04
-
03
16
:
26
:
58
,
423
-
mmdet
-
INFO
-
workflow
:
[(
'train'
,
1
)],
max
:
24
epochs
2026
-
04
-
03
16
:
26
:
58
,
423
-
mmdet
-
INFO
-
Checkpoints
will
be
saved
to
/
workspace
/
Flashocc
/
work_dirs
/
flashocc
-
r50
by
HardDiskBackend
.
2026
-
04
-
03
16
:
37
:
58
,
297
-
mmdet
-
INFO
-
Epoch
[
1
][
1
/
147
]
lr
:
1.000e-07
,
eta
:
26
days
,
22
:
22
:
17
,
time
:
659.750
,
data_time
:
15.432
,
memory
:
32423
,
loss_occ
:
3.0086
,
loss
:
3.0086
,
grad_norm
:
3.9001
2026
-
04
-
03
16
:
38
:
03
,
001
-
mmdet
-
INFO
-
Epoch
[
1
][
2
/
147
]
lr
:
5.995e-07
,
eta
:
13
days
,
13
:
23
:
57
,
time
:
4.707
,
data_time
:
0.005
,
memory
:
32769
,
loss_occ
:
3.0129
,
loss
:
3.0129
,
grad_norm
:
3.9387
2026
-
04
-
03
16
:
38
:
04
,
134
-
mmdet
-
INFO
-
Epoch
[
1
][
3
/
147
]
lr
:
1.099e-06
,
eta
:
9
days
,
1
:
14
:
27
,
time
:
1.133
,
data_time
:
0.003
,
memory
:
32769
,
loss_occ
:
3.0150
,
loss
:
3.0150
,
grad_norm
:
3.8985
2026
-
04
-
03
16
:
38
:
05
,
261
-
mmdet
-
INFO
-
Epoch
[
1
][
4
/
147
]
lr
:
1.599e-06
,
eta
:
6
days
,
19
:
09
:
36
,
time
:
1.126
,
data_time
:
0.002
,
memory
:
32769
,
loss_occ
:
3.0085
,
loss
:
3.0085
,
grad_norm
:
3.8741
2026
-
04
-
03
16
:
38
:
06
,
388
-
mmdet
-
INFO
-
Epoch
[
1
][
5
/
147
]
lr
:
2.098e-06
,
eta
:
5
days
,
10
:
42
:
41
,
time
:
1.127
,
data_time
:
0.003
,
memory
:
32769
,
loss_occ
:
3.0044
,
loss
:
3.0044
,
grad_norm
:
3.8639
2026
-
04
-
03
16
:
38
:
07
,
513
-
mmdet
-
INFO
-
Epoch
[
1
][
6
/
147
]
lr
:
2.597e-06
,
eta
:
4
days
,
13
:
04
:
44
,
time
:
1.126
,
data_time
:
0.003
,
memory
:
32769
,
loss_occ
:
3.0085
,
loss
:
3.0085
,
grad_norm
:
3.8423
2026
-
04
-
03
16
:
38
:
08
,
643
-
mmdet
-
INFO
-
Epoch
[
1
][
7
/
147
]
lr
:
3.097e-06
,
eta
:
3
days
,
21
:
37
:
37
,
time
:
1.126
,
data_time
:
0.003
,
memory
:
32769
,
loss_occ
:
3.0085
,
loss
:
3.0085
,
grad_norm
:
3.9473
2026
-
04
-
03
16
:
38
:
09
,
771
-
mmdet
-
INFO
-
Epoch
[
1
][
8
/
147
]
lr
:
3.597e-06
,
eta
:
3
days
,
10
:
02
:
19
,
time
:
1.130
,
data_time
:
0.006
,
memory
:
32769
,
loss_occ
:
3.0038
,
loss
:
3.0038
,
grad_norm
:
3.9215
2026
-
04
-
03
16
:
38
:
10
,
898
-
mmdet
-
INFO
-
Epoch
[
1
][
9
/
147
]
lr
:
4.096e-06
,
eta
:
3
days
,
1
:
01
:
30
,
time
:
1.128
,
data_time
:
0.004
,
memory
:
32769
,
loss_occ
:
2.9969
,
loss
:
2.9969
,
grad_norm
:
3.8659
2026
-
04
-
03
16
:
38
:
12
,
026
-
mmdet
-
INFO
-
Epoch
[
1
][
10
/
147
]
lr
:
4.596e-06
,
eta
:
2
days
,
17
:
48
:
50
,
time
:
1.127
,
data_time
:
0.003
,
memory
:
32769
,
loss_occ
:
2.9957
,
loss
:
2.9957
,
grad_norm
:
3.8820
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log.json
0 → 100644
View file @
d2b71343
{
"env_info"
:
"sys.platform: linux
\n
Python: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]
\n
CUDA available: True
\n
GPU 0,1,2,3,4,5,6,7: BW1000_H
\n
CUDA_HOME: /opt/dtk
\n
NVCC: Not Available
\n
GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
\n
PyTorch: 2.5.1
\n
PyTorch compiling details: PyTorch built with:
\n
- GCC 10.3
\n
- C++ Version: 201703
\n
- Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications
\n
- OpenMP 201511 (a.k.a. OpenMP 4.5)
\n
- LAPACK is enabled (usually provided by MKL)
\n
- NNPACK is enabled
\n
- CPU capability usage: AVX512
\n
- HIP Runtime 6.3.25521
\n
- MIOpen 2.18.0
\n
- Magma 2.8.0
\n
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF,
\n\n
TorchVision: 0.20.1
\n
OpenCV: 4.12.0
\n
MMCV: 1.6.1
\n
MMCV Compiler: GCC 10.3
\n
MMCV CUDA Compiler: rocm not available
\n
MMDetection: 2.25.1
\n
MMSegmentation: 0.25.0
\n
MMDetection3D: 1.0.0rc4+
\n
spconv2.0: False"
,
"config"
:
"point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
\n
class_names = [
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
\n
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
]
\n
dataset_type = 'NuScenesDatasetOccpancy'
\n
data_root = 'data/nuscenes/'
\n
input_modality = dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False)
\n
file_client_args = dict(backend='disk')
\n
train_pipeline = [
\n
dict(
\n
type='PrepareImageInputs',
\n
is_train=True,
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
is_train=True),
\n
dict(type='LoadOccGTFromFile'),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='PointToMultiViewDepth',
\n
downsample=1,
\n
grid_config=dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5])),
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
]),
\n
dict(
\n
type='Collect3D',
\n
keys=[
\n
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
\n
'mask_camera'
\n
])
\n
]
\n
test_pipeline = [
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
]
\n
eval_pipeline = [
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='LoadPointsFromMultiSweeps',
\n
sweeps_num=10,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
\n
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points'])
\n
]
\n
data = dict(
\n
samples_per_gpu=24,
\n
workers_per_gpu=24,
\n
train=dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
is_train=True,
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=True),
\n
dict(type='LoadOccGTFromFile'),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='PointToMultiViewDepth',
\n
downsample=1,
\n
grid_config=dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5])),
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
]),
\n
dict(
\n
type='Collect3D',
\n
keys=[
\n
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
\n
'mask_camera'
\n
])
\n
],
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
test_mode=False,
\n
box_type_3d='LiDAR',
\n
use_valid_flag=True,
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet'),
\n
val=dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
test_mode=True,
\n
box_type_3d='LiDAR',
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet'),
\n
test=dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
test_mode=True,
\n
box_type_3d='LiDAR',
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet'))
\n
evaluation = dict(
\n
interval=1,
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
start=20)
\n
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
\n
log_config = dict(
\n
interval=1,
\n
hooks=[dict(type='TextLoggerHook'),
\n
dict(type='TensorboardLoggerHook')])
\n
dist_params = dict(backend='nccl')
\n
log_level = 'INFO'
\n
work_dir = './work_dirs/flashocc-r50'
\n
load_from = 'ckpts/bevdet-r50-cbgs.pth'
\n
resume_from = None
\n
workflow = [('train', 1)]
\n
opencv_num_threads = 0
\n
mp_start_method = 'fork'
\n
plugin = True
\n
plugin_dir = 'projects/mmdet3d_plugin/'
\n
data_config = dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
\n
'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0)
\n
grid_config = dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5])
\n
voxel_size = [0.1, 0.1, 0.2]
\n
numC_Trans = 64
\n
model = dict(
\n
type='BEVDetOCC',
\n
img_backbone=dict(
\n
type='ResNet',
\n
depth=50,
\n
num_stages=4,
\n
out_indices=(2, 3),
\n
frozen_stages=-1,
\n
norm_cfg=dict(type='BN', requires_grad=True),
\n
norm_eval=False,
\n
with_cp=True,
\n
style='pytorch'),
\n
img_neck=dict(
\n
type='CustomFPN',
\n
in_channels=[1024, 2048],
\n
out_channels=256,
\n
num_outs=1,
\n
start_level=0,
\n
out_ids=[0]),
\n
img_view_transformer=dict(
\n
type='LSSViewTransformer',
\n
grid_config=dict(
\n
x=[-40, 40, 0.4],
\n
y=[-40, 40, 0.4],
\n
z=[-1, 5.4, 6.4],
\n
depth=[1.0, 45.0, 0.5]),
\n
input_size=(256, 704),
\n
in_channels=256,
\n
out_channels=64,
\n
sid=False,
\n
collapse_z=True,
\n
downsample=16),
\n
img_bev_encoder_backbone=dict(
\n
type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
\n
img_bev_encoder_neck=dict(
\n
type='FPN_LSS', in_channels=640, out_channels=256),
\n
occ_head=dict(
\n
type='BEVOCCHead2D',
\n
in_dim=256,
\n
out_dim=256,
\n
Dz=16,
\n
use_mask=True,
\n
num_classes=18,
\n
use_predicter=True,
\n
class_balance=False,
\n
loss_occ=dict(
\n
type='CrossEntropyLoss',
\n
use_sigmoid=False,
\n
ignore_index=255,
\n
loss_weight=1.0)))
\n
bda_aug_conf = dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5)
\n
share_data_config = dict(
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
\n
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet')
\n
test_data_config = dict(
\n
pipeline=[
\n
dict(
\n
type='PrepareImageInputs',
\n
data_config=dict(
\n
cams=[
\n
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
\n
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
\n
],
\n
Ncams=6,
\n
input_size=(256, 704),
\n
src_size=(900, 1600),
\n
resize=(-0.06, 0.11),
\n
rot=(-5.4, 5.4),
\n
flip=True,
\n
crop_h=(0.0, 0.0),
\n
resize_test=0.0),
\n
sequential=False),
\n
dict(
\n
type='LoadAnnotationsBEVDepth',
\n
bda_aug_conf=dict(
\n
rot_lim=(-0.0, 0.0),
\n
scale_lim=(1.0, 1.0),
\n
flip_dx_ratio=0.5,
\n
flip_dy_ratio=0.5),
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
\n
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
\n
'traffic_cone'
\n
],
\n
is_train=False),
\n
dict(
\n
type='LoadPointsFromFile',
\n
coord_type='LIDAR',
\n
load_dim=5,
\n
use_dim=5,
\n
file_client_args=dict(backend='disk')),
\n
dict(
\n
type='MultiScaleFlipAug3D',
\n
img_scale=(1333, 800),
\n
pts_scale_ratio=1,
\n
flip=False,
\n
transforms=[
\n
dict(
\n
type='DefaultFormatBundle3D',
\n
class_names=[
\n
'car', 'truck', 'construction_vehicle', 'bus',
\n
'trailer', 'barrier', 'motorcycle', 'bicycle',
\n
'pedestrian', 'traffic_cone'
\n
],
\n
with_label=False),
\n
dict(type='Collect3D', keys=['points', 'img_inputs'])
\n
])
\n
],
\n
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
\n
type='NuScenesDatasetOccpancy',
\n
data_root='data/nuscenes/',
\n
classes=[
\n
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
\n
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
\n
],
\n
modality=dict(
\n
use_lidar=False,
\n
use_camera=True,
\n
use_radar=False,
\n
use_map=False,
\n
use_external=False),
\n
stereo=False,
\n
filter_empty_gt=False,
\n
img_info_prototype='bevdet')
\n
key = 'test'
\n
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
\n
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
\n
lr_config = dict(
\n
policy='step',
\n
warmup='linear',
\n
warmup_iters=200,
\n
warmup_ratio=0.001,
\n
step=[24])
\n
runner = dict(type='EpochBasedRunner', max_epochs=24)
\n
custom_hooks = [
\n
dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
\n
]
\n
gpu_ids = range(0, 8)
\n
"
,
"seed"
:
0
,
"exp_name"
:
"flashocc-r50.py"
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
1
,
"lr"
:
0.0
,
"memory"
:
32423
,
"data_time"
:
15.43241
,
"loss_occ"
:
3.00864
,
"loss"
:
3.00864
,
"grad_norm"
:
3.90007
,
"time"
:
659.74966
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
2
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00516
,
"loss_occ"
:
3.01287
,
"loss"
:
3.01287
,
"grad_norm"
:
3.9387
,
"time"
:
4.70719
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
3
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00255
,
"loss_occ"
:
3.015
,
"loss"
:
3.015
,
"grad_norm"
:
3.89853
,
"time"
:
1.13283
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
4
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00234
,
"loss_occ"
:
3.00854
,
"loss"
:
3.00854
,
"grad_norm"
:
3.87413
,
"time"
:
1.12622
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
5
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00279
,
"loss_occ"
:
3.0044
,
"loss"
:
3.0044
,
"grad_norm"
:
3.86394
,
"time"
:
1.12671
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
6
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00295
,
"loss_occ"
:
3.00848
,
"loss"
:
3.00848
,
"grad_norm"
:
3.84233
,
"time"
:
1.12617
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
7
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00263
,
"loss_occ"
:
3.0085
,
"loss"
:
3.0085
,
"grad_norm"
:
3.94733
,
"time"
:
1.12608
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
8
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00604
,
"loss_occ"
:
3.00383
,
"loss"
:
3.00383
,
"grad_norm"
:
3.9215
,
"time"
:
1.13024
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
9
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.0036
,
"loss_occ"
:
2.99689
,
"loss"
:
2.99689
,
"grad_norm"
:
3.86593
,
"time"
:
1.1281
}
{
"mode"
:
"train"
,
"epoch"
:
1
,
"iter"
:
10
,
"lr"
:
0.0
,
"memory"
:
32769
,
"data_time"
:
0.00266
,
"loss_occ"
:
2.9957
,
"loss"
:
2.9957
,
"grad_norm"
:
3.88205
,
"time"
:
1.12727
}
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/flashocc-r50.py
0 → 100644
View file @
d2b71343
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
dataset_type
=
'NuScenesDatasetOccpancy'
data_root
=
'data/nuscenes/'
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
file_client_args
=
dict
(
backend
=
'disk'
)
train_pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
is_train
=
True
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'PointToMultiViewDepth'
,
downsample
=
1
,
grid_config
=
dict
(
x
=
[
-
40
,
40
,
0.4
],
y
=
[
-
40
,
40
,
0.4
],
z
=
[
-
1
,
5.4
,
6.4
],
depth
=
[
1.0
,
45.0
,
0.5
])),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img_inputs'
,
'gt_depth'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
])
]
test_pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img_inputs'
])
])
]
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
24
,
workers_per_gpu
=
24
,
train
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_train.pkl'
,
pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
is_train
=
True
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'PointToMultiViewDepth'
,
downsample
=
1
,
grid_config
=
dict
(
x
=
[
-
40
,
40
,
0.4
],
y
=
[
-
40
,
40
,
0.4
],
z
=
[
-
1
,
5.4
,
6.4
],
depth
=
[
1.0
,
45.0
,
0.5
])),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img_inputs'
,
'gt_depth'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
])
],
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
False
,
box_type_3d
=
'LiDAR'
,
use_valid_flag
=
True
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
),
val
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img_inputs'
])
])
],
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
),
test
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img_inputs'
])
])
],
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
))
evaluation
=
dict
(
interval
=
1
,
pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img_inputs'
])
])
],
start
=
20
)
checkpoint_config
=
dict
(
interval
=
1
,
max_keep_ckpts
=
5
)
log_config
=
dict
(
interval
=
1
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)])
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/flashocc-r50'
load_from
=
'ckpts/bevdet-r50-cbgs.pth'
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
opencv_num_threads
=
0
mp_start_method
=
'fork'
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
)
grid_config
=
dict
(
x
=
[
-
40
,
40
,
0.4
],
y
=
[
-
40
,
40
,
0.4
],
z
=
[
-
1
,
5.4
,
6.4
],
depth
=
[
1.0
,
45.0
,
0.5
])
voxel_size
=
[
0.1
,
0.1
,
0.2
]
numC_Trans
=
64
model
=
dict
(
type
=
'BEVDetOCC'
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
2
,
3
),
frozen_stages
=-
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
False
,
with_cp
=
True
,
style
=
'pytorch'
),
img_neck
=
dict
(
type
=
'CustomFPN'
,
in_channels
=
[
1024
,
2048
],
out_channels
=
256
,
num_outs
=
1
,
start_level
=
0
,
out_ids
=
[
0
]),
img_view_transformer
=
dict
(
type
=
'LSSViewTransformer'
,
grid_config
=
dict
(
x
=
[
-
40
,
40
,
0.4
],
y
=
[
-
40
,
40
,
0.4
],
z
=
[
-
1
,
5.4
,
6.4
],
depth
=
[
1.0
,
45.0
,
0.5
]),
input_size
=
(
256
,
704
),
in_channels
=
256
,
out_channels
=
64
,
sid
=
False
,
collapse_z
=
True
,
downsample
=
16
),
img_bev_encoder_backbone
=
dict
(
type
=
'CustomResNet'
,
numC_input
=
64
,
num_channels
=
[
128
,
256
,
512
]),
img_bev_encoder_neck
=
dict
(
type
=
'FPN_LSS'
,
in_channels
=
640
,
out_channels
=
256
),
occ_head
=
dict
(
type
=
'BEVOCCHead2D'
,
in_dim
=
256
,
out_dim
=
256
,
Dz
=
16
,
use_mask
=
True
,
num_classes
=
18
,
use_predicter
=
True
,
class_balance
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
ignore_index
=
255
,
loss_weight
=
1.0
)))
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
)
share_data_config
=
dict
(
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
)
test_data_config
=
dict
(
pipeline
=
[
dict
(
type
=
'PrepareImageInputs'
,
data_config
=
dict
(
cams
=
[
'CAM_FRONT_LEFT'
,
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_BACK_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_RIGHT'
],
Ncams
=
6
,
input_size
=
(
256
,
704
),
src_size
=
(
900
,
1600
),
resize
=
(
-
0.06
,
0.11
),
rot
=
(
-
5.4
,
5.4
),
flip
=
True
,
crop_h
=
(
0.0
,
0.0
),
resize_test
=
0.0
),
sequential
=
False
),
dict
(
type
=
'LoadAnnotationsBEVDepth'
,
bda_aug_conf
=
dict
(
rot_lim
=
(
-
0.0
,
0.0
),
scale_lim
=
(
1.0
,
1.0
),
flip_dx_ratio
=
0.5
,
flip_dy_ratio
=
0.5
),
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
is_train
=
False
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
dict
(
backend
=
'disk'
)),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img_inputs'
])
])
],
ann_file
=
'data/nuscenes/bevdetv2-nuscenes_infos_val.pkl'
,
type
=
'NuScenesDatasetOccpancy'
,
data_root
=
'data/nuscenes/'
,
classes
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
],
modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
),
stereo
=
False
,
filter_empty_gt
=
False
,
img_info_prototype
=
'bevdet'
)
key
=
'test'
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.0001
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
5
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
200
,
warmup_ratio
=
0.001
,
step
=
[
24
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
24
)
custom_hooks
=
[
dict
(
type
=
'MEGVIIEMAHook'
,
init_updates
=
10560
,
priority
=
'NORMAL'
)
]
gpu_ids
=
range
(
0
,
8
)
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204673.bw61.849.0
0 → 100644
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204818.bw61.20636.0
0 → 100644
View file @
d2b71343
File added
MapTR
@
e03f097a
Subproject commit e03f097abef19e1ba3fed5f471a8d80fbfa0a064
mmdetection3d
@
962f0937
Subproject commit 962f093736ffe55c089bc618842a8b8567318c8c
Sparse4D
@
c41df4bb
Subproject commit c41df4bbf7bc82490f11ff55173abfcb3fb91425
docker-hub/qwen2.5-vl/readme.md
View file @
d2b71343
...
...
@@ -10,7 +10,7 @@ git clone -b core_v0.12.0 --recurse-submodules http://10.16.6.30/dcutoolkit/deep
cd dcu_megatron-core_v0.12.0/
python setup.py install
cd
.
./llama-factory
cd ./llama-factory
pip install -r requirements.txt
```
...
...
docker-hub/wan2.1/readme.md
View file @
d2b71343
...
...
@@ -4,7 +4,7 @@
```
docker run -dit --network=host --name=wan21 --privileged --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size=128G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root --ulimit stack=-1:-1 --ulimit memlock=-1:-1 -v /opt/hyhal:/opt/hyhal:ro -v /public/opendas/DL_DATA/llm-models/:/models:ro harbor.sourcefind.cn:5443/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dtk25.04.4-1230-py3.10-20260115
cd musubi-tuner
cd
./
musubi-tuner
pip install -e .
```
...
...
Prev
1
…
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment