Commit d2b71343 authored by 雍大凯's avatar 雍大凯
Browse files

add code

parent 69e57885
#!/usr/bin/env bash
set -x
PARTITION=$1
JOB_NAME=$2
CONFIG=$3
CHECKPOINT=$4
GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
PY_ARGS=${@:5}
SRUN_ARGS=${SRUN_ARGS:-""}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \
${SRUN_ARGS} \
python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
#!/usr/bin/env bash
set -x
PARTITION=$1
JOB_NAME=$2
CONFIG=$3
WORK_DIR=$4
GPUS=${GPUS:-8}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
CPUS_PER_TASK=${CPUS_PER_TASK:-5}
SRUN_ARGS=${SRUN_ARGS:-""}
PY_ARGS=${@:5}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \
--cpus-per-task=${CPUS_PER_TASK} \
--kill-on-bad-exit=1 \
${SRUN_ARGS} \
python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import warnings
import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
wrap_fp16_model)
import mmdet
from mmdet3d.apis import single_gpu_test
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_model
from mmdet.apis import multi_gpu_test, set_random_seed
from mmdet.datasets import replace_ImageToTensor
if mmdet.__version__ > '2.23.0':
# If mmdet version > 2.23.0, setup_multi_processes would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import setup_multi_processes
else:
from mmdet3d.utils import setup_multi_processes
try:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import compat_cfg
except ImportError:
from mmdet3d.utils import compat_cfg
def parse_args():
parser = argparse.ArgumentParser(
description='MMDet test (and eval) a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('--out', help='output result file in pickle format')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)')
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='id of gpu to use '
'(only applicable to non-distributed testing)')
parser.add_argument(
'--format-only',
action='store_true',
help='Format the output results without perform evaluation. It is'
'useful when you want to format the result to a specific format and '
'submit it to the test server')
parser.add_argument(
'--eval',
type=str,
nargs='+',
help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
parser.add_argument('--show', action='store_true', help='show results')
parser.add_argument(
'--show-dir', help='directory where results will be saved')
parser.add_argument(
'--gpu-collect',
action='store_true',
help='whether to use gpu to collect results.')
parser.add_argument(
'--no-aavt',
action='store_true',
help='Do not align after view transformer.')
parser.add_argument(
'--tmpdir',
help='tmp directory used for collecting results from multiple '
'workers, available when gpu-collect is not specified')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--options',
nargs='+',
action=DictAction,
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function (deprecate), '
'change to --eval-options instead.')
parser.add_argument(
'--eval-options',
nargs='+',
action=DictAction,
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
if args.options and args.eval_options:
raise ValueError(
'--options and --eval-options cannot be both specified, '
'--options is deprecated in favor of --eval-options')
if args.options:
warnings.warn('--options is deprecated in favor of --eval-options')
args.eval_options = args.options
return args
def main():
args = parse_args()
assert args.out or args.eval or args.format_only or args.show \
or args.show_dir, \
('Please specify at least one operation (save/eval/format/show the '
'results / save the results) with the argument "--out", "--eval"'
', "--format-only", "--show" or "--show-dir"')
if args.eval and args.format_only:
raise ValueError('--eval and --format_only cannot be both specified')
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
cfg = compat_cfg(cfg)
# set multi-process settings
setup_multi_processes(cfg)
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids[0:1]
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed testing. Use the first GPU '
'in `gpu_ids` now.')
else:
cfg.gpu_ids = [args.gpu_id]
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)
# in case the test dataset is concatenated
if isinstance(cfg.data.test, dict):
cfg.data.test.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(
cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
for ds_cfg in cfg.data.test:
ds_cfg.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
test_loader_cfg = {
**test_dataloader_default_args,
**cfg.data.get('test_dataloader', {})
}
# set random seeds
if args.seed is not None:
set_random_seed(args.seed, deterministic=args.deterministic)
# build the dataloader
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(dataset, **test_loader_cfg)
# build the model and load checkpoint
if not args.no_aavt:
if '4D' in cfg.model.type:
cfg.model.align_after_view_transfromation=True
cfg.model.train_cfg = None
model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_conv_bn(model)
# old versions did not save class info in checkpoints, this walkaround is
# for backward compatibility
if 'CLASSES' in checkpoint.get('meta', {}):
model.CLASSES = checkpoint['meta']['CLASSES']
else:
model.CLASSES = dataset.CLASSES
# palette for visualization in segmentation tasks
if 'PALETTE' in checkpoint.get('meta', {}):
model.PALETTE = checkpoint['meta']['PALETTE']
elif hasattr(dataset, 'PALETTE'):
# segmentation dataset has `PALETTE` attribute
model.PALETTE = dataset.PALETTE
if not distributed:
model = MMDataParallel(model, device_ids=cfg.gpu_ids)
outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
else:
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False)
outputs = multi_gpu_test(model, data_loader, args.tmpdir,
args.gpu_collect)
rank, _ = get_dist_info()
if rank == 0:
if args.out:
print(f'\nwriting results to {args.out}')
mmcv.dump(outputs, args.out)
kwargs = {} if args.eval_options is None else args.eval_options
if args.format_only:
dataset.format_results(outputs, **kwargs)
if args.eval:
eval_kwargs = cfg.get('evaluation', {}).copy()
# hard-code way to remove EvalHook args
for key in [
'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
'rule'
]:
eval_kwargs.pop(key, None)
eval_kwargs.update(dict(metric=args.eval, **kwargs))
print(dataset.evaluate(outputs, **eval_kwargs))
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
from __future__ import division
import argparse
import copy
import os
import time
import warnings
from os import path as osp
import mmcv
import torch
import torch.distributed as dist
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from mmdet import __version__ as mmdet_version
from mmdet3d import __version__ as mmdet3d_version
from mmdet3d.apis import init_random_seed, train_model
from mmdet3d.datasets import build_dataset
from mmdet3d.models import build_model
from mmdet3d.utils import collect_env, get_root_logger
from mmdet.apis import set_random_seed
from mmseg import __version__ as mmseg_version
torch._dynamo.config.capture_scalar_outputs=True
try:
# If mmdet version > 2.20.0, setup_multi_processes would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import setup_multi_processes
except ImportError:
from mmdet3d.utils import setup_multi_processes
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume-from', help='the checkpoint file to resume from')
parser.add_argument(
'--auto-resume',
action='store_true',
help='resume from the latest checkpoint automatically')
parser.add_argument(
'--validate',
action='store_true',
help='whether not to evaluate the checkpoint during training')
group_gpus = parser.add_mutually_exclusive_group()
group_gpus.add_argument(
'--gpus',
type=int,
help='(Deprecated, please use --gpu-id) number of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-id',
type=int,
default=0,
help='number of gpus to use '
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument(
'--diff-seed',
action='store_true',
help='Whether or not set different seeds for different ranks')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
parser.add_argument(
'--autoscale-lr',
action='store_true',
help='automatically scale lr with the number of gpus')
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
if args.options and args.cfg_options:
raise ValueError(
'--options and --cfg-options cannot be both specified, '
'--options is deprecated in favor of --cfg-options')
if args.options:
warnings.warn('--options is deprecated in favor of --cfg-options')
args.cfg_options = args.options
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# set multi-process settings
setup_multi_processes(cfg)
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
if args.resume_from is not None:
cfg.resume_from = args.resume_from
if args.auto_resume:
cfg.auto_resume = args.auto_resume
warnings.warn('`--auto-resume` is only supported when mmdet'
'version >= 2.20.0 for 3D detection model or'
'mmsegmentation verision >= 0.21.0 for 3D'
'segmentation model')
if args.gpus is not None:
cfg.gpu_ids = range(1)
warnings.warn('`--gpus` is deprecated because we only support '
'single GPU mode in non-distributed training. '
'Use `gpus=1` now.')
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids[0:1]
warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed training. Use the first GPU '
'in `gpu_ids` now.')
if args.gpus is None and args.gpu_ids is None:
cfg.gpu_ids = [args.gpu_id]
if args.autoscale_lr:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# re-set gpu_ids with distributed training mode
_, world_size = get_dist_info()
cfg.gpu_ids = range(world_size)
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
# init the logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
# specify logger name, if we still use 'mmdet', the output info will be
# filtered and won't be saved in the log_file
# TODO: ugly workaround to judge whether we are training det or seg model
if cfg.model.type in ['EncoderDecoder3D']:
logger_name = 'mmseg'
else:
logger_name = 'mmdet'
logger = get_root_logger(
log_file=log_file, log_level=cfg.log_level, name=logger_name)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
dash_line)
meta['env_info'] = env_info
meta['config'] = cfg.pretty_text
# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config:\n{cfg.pretty_text}')
# set random seeds
seed = init_random_seed(args.seed)
seed = seed + dist.get_rank() if args.diff_seed else seed
logger.info(f'Set random seed to {seed}, '
f'deterministic: {args.deterministic}')
set_random_seed(seed, deterministic=args.deterministic)
cfg.seed = seed
meta['seed'] = seed
meta['exp_name'] = osp.basename(args.config)
model = build_model(
cfg.model,
train_cfg=cfg.get('train_cfg'),
test_cfg=cfg.get('test_cfg'))
model.init_weights()
logger.info(f'Model:\n{model}')
datasets = [build_dataset(cfg.data.train)]
if len(cfg.workflow) == 2:
val_dataset = copy.deepcopy(cfg.data.val)
# in case we use a dataset wrapper
if 'dataset' in cfg.data.train:
val_dataset.pipeline = cfg.data.train.dataset.pipeline
else:
val_dataset.pipeline = cfg.data.train.pipeline
# set test_mode=False here in deep copied config
# which do not affect AP/AR calculation later
# refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa
val_dataset.test_mode = False
datasets.append(build_dataset(val_dataset))
if cfg.checkpoint_config is not None:
# save mmdet version, config file content and class names in
# checkpoints as meta data
cfg.checkpoint_config.meta = dict(
mmdet_version=mmdet_version,
mmseg_version=mmseg_version,
mmdet3d_version=mmdet3d_version,
config=cfg.pretty_text,
CLASSES=datasets[0].CLASSES,
PALETTE=datasets[0].PALETTE # for segmentors
if hasattr(datasets[0], 'PALETTE') else None)
# add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES
train_model(
model,
datasets,
cfg,
distributed=distributed,
validate=args.validate,
timestamp=timestamp,
meta=meta)
if __name__ == '__main__':
torch.multiprocessing.set_start_method('fork')
main()
import argparse
import time
from os import path as osp
import mmcv
import numpy as np
from mmdet3d.core.bbox import limit_period
def update_sunrgbd_infos(root_dir, out_dir, pkl_files):
print(f'{pkl_files} will be modified because '
f'of the refactor of the Depth coordinate system.')
if root_dir == out_dir:
print(f'Warning, you are overwriting '
f'the original data under {root_dir}.')
time.sleep(3)
for pkl_file in pkl_files:
in_path = osp.join(root_dir, pkl_file)
print(f'Reading from input file: {in_path}.')
a = mmcv.load(in_path)
print('Start updating:')
for item in mmcv.track_iter_progress(a):
if 'rotation_y' in item['annos']:
item['annos']['rotation_y'] = -item['annos']['rotation_y']
item['annos']['gt_boxes_upright_depth'][:, -1:] = \
-item['annos']['gt_boxes_upright_depth'][:, -1:]
out_path = osp.join(out_dir, pkl_file)
print(f'Writing to output file: {out_path}.')
mmcv.dump(a, out_path, 'pkl')
def update_outdoor_dbinfos(root_dir, out_dir, pkl_files):
print(f'{pkl_files} will be modified because '
f'of the refactor of the LIDAR coordinate system.')
if root_dir == out_dir:
print(f'Warning, you are overwriting '
f'the original data under {root_dir}.')
time.sleep(3)
for pkl_file in pkl_files:
in_path = osp.join(root_dir, pkl_file)
print(f'Reading from input file: {in_path}.')
a = mmcv.load(in_path)
print('Start updating:')
for k in a.keys():
print(f'Updating samples of class {k}:')
for item in mmcv.track_iter_progress(a[k]):
boxes = item['box3d_lidar'].copy()
# swap l, w (or dx, dy)
item['box3d_lidar'][3] = boxes[4]
item['box3d_lidar'][4] = boxes[3]
# change yaw
item['box3d_lidar'][6] = -boxes[6] - np.pi / 2
item['box3d_lidar'][6] = limit_period(
item['box3d_lidar'][6], period=np.pi * 2)
out_path = osp.join(out_dir, pkl_file)
print(f'Writing to output file: {out_path}.')
mmcv.dump(a, out_path, 'pkl')
def update_nuscenes_or_lyft_infos(root_dir, out_dir, pkl_files):
print(f'{pkl_files} will be modified because '
f'of the refactor of the LIDAR coordinate system.')
if root_dir == out_dir:
print(f'Warning, you are overwriting '
f'the original data under {root_dir}.')
time.sleep(3)
for pkl_file in pkl_files:
in_path = osp.join(root_dir, pkl_file)
print(f'Reading from input file: {in_path}.')
a = mmcv.load(in_path)
print('Start updating:')
for item in mmcv.track_iter_progress(a['infos']):
boxes = item['gt_boxes'].copy()
# swap l, w (or dx, dy)
item['gt_boxes'][:, 3] = boxes[:, 4]
item['gt_boxes'][:, 4] = boxes[:, 3]
# change yaw
item['gt_boxes'][:, 6] = -boxes[:, 6] - np.pi / 2
item['gt_boxes'][:, 6] = limit_period(
item['gt_boxes'][:, 6], period=np.pi * 2)
out_path = osp.join(out_dir, pkl_file)
print(f'Writing to output file: {out_path}.')
mmcv.dump(a, out_path, 'pkl')
parser = argparse.ArgumentParser(description='Arg parser for data coords '
'update due to coords sys refactor.')
parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
parser.add_argument(
'--root-dir',
type=str,
default='./data/kitti',
help='specify the root dir of dataset')
parser.add_argument(
'--version',
type=str,
default='v1.0',
required=False,
help='specify the dataset version, no need for kitti')
parser.add_argument(
'--out-dir',
type=str,
default=None,
required=False,
help='name of info pkl')
args = parser.parse_args()
if __name__ == '__main__':
if args.out_dir is None:
args.out_dir = args.root_dir
if args.dataset == 'kitti':
# KITTI infos is in CAM coord sys (unchanged)
# KITTI dbinfos is in LIDAR coord sys (changed)
# so we only update dbinfos
pkl_files = ['kitti_dbinfos_train.pkl']
update_outdoor_dbinfos(
root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
elif args.dataset == 'nuscenes':
# nuScenes infos is in LIDAR coord sys (changed)
# nuScenes dbinfos is in LIDAR coord sys (changed)
# so we update both infos and dbinfos
pkl_files = ['nuscenes_infos_val.pkl']
if args.version != 'v1.0-mini':
pkl_files.append('nuscenes_infos_train.pkl')
else:
pkl_files.append('nuscenes_infos_train_tiny.pkl')
update_nuscenes_or_lyft_infos(
root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
if args.version != 'v1.0-mini':
pkl_files = ['nuscenes_dbinfos_train.pkl']
update_outdoor_dbinfos(
root_dir=args.root_dir,
out_dir=args.out_dir,
pkl_files=pkl_files)
elif args.dataset == 'lyft':
# Lyft infos is in LIDAR coord sys (changed)
# Lyft has no dbinfos
# so we update infos
pkl_files = ['lyft_infos_train.pkl', 'lyft_infos_val.pkl']
update_nuscenes_or_lyft_infos(
root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
elif args.dataset == 'waymo':
# Waymo infos is in CAM coord sys (unchanged)
# Waymo dbinfos is in LIDAR coord sys (changed)
# so we only update dbinfos
pkl_files = ['waymo_dbinfos_train.pkl']
update_outdoor_dbinfos(
root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
elif args.dataset == 'scannet':
# ScanNet infos is in DEPTH coord sys (changed)
# but bbox is without yaw
# so ScanNet is unaffected
pass
elif args.dataset == 's3dis':
# Segmentation datasets are not affected
pass
elif args.dataset == 'sunrgbd':
# SUNRGBD infos is in DEPTH coord sys (changed)
# and bbox is with yaw
# so we update infos
pkl_files = ['sunrgbd_infos_train.pkl', 'sunrgbd_infos_val.pkl']
update_sunrgbd_infos(
root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
#!/usr/bin/env bash
set -x
export PYTHONPATH=`pwd`:$PYTHONPATH
PARTITION=$1
DATASET=$2
GPUS=${GPUS:-1}
GPUS_PER_NODE=${GPUS_PER_NODE:-1}
SRUN_ARGS=${SRUN_ARGS:-""}
JOB_NAME=update_data_coords
srun -p ${PARTITION} \
--job-name=${JOB_NAME} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=${GPUS} \
--ntasks-per-node=${GPUS_PER_NODE} \
--kill-on-bad-exit=1 \
${SRUN_ARGS} \
python -u tools/update_data_coords.py ${DATASET} \
--root-dir ./data/${DATASET} \
--out-dir ./data/${DATASET}
import os
import cv2
import logging
import argparse
import importlib
import torch
import numpy as np
from tqdm import tqdm
from mmcv import Config, DictAction
from mmdet.apis import set_random_seed
from mmdet3d.datasets import build_dataset, build_dataloader
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint
from mmdet3d.models import build_model
import mmdet
from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
wrap_fp16_model)
import sys
sys.path.insert(0, os.getcwd())
if mmdet.__version__ > '2.23.0':
# If mmdet version > 2.23.0, setup_multi_processes would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import setup_multi_processes
else:
from mmdet3d.utils import setup_multi_processes
try:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import compat_cfg
except ImportError:
from mmdet3d.utils import compat_cfg
occ_class_names = [
'others', 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle',
'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck',
'driveable_surface', 'other_flat', 'sidewalk',
'terrain', 'manmade', 'vegetation', 'free'
]
color_map = np.array([
[0, 0, 0, 255], # others
[255, 120, 50, 255], # barrier orangey
[255, 192, 203, 255], # bicycle pink
[255, 255, 0, 255], # bus yellow
[0, 150, 245, 255], # car blue
[0, 255, 255, 255], # construction_vehicle cyan
[200, 180, 0, 255], # motorcycle dark orange
[255, 0, 0, 255], # pedestrian red
[255, 240, 150, 255], # traffic_cone light yellow
[135, 60, 0, 255], # trailer brown
[160, 32, 240, 255], # truck purple
[255, 0, 255, 255], # driveable_surface dark pink
[175, 0, 75, 255], # other_flat dark red
[75, 0, 75, 255], # sidewalk dard purple
[150, 240, 80, 255], # terrain light green
[230, 230, 250, 255], # manmade white
[0, 175, 0, 255], # vegetation green
[255, 255, 255, 255], # free white
], dtype=np.uint8)
# # from matplotlib import colors
# # hex_code_list = [
# # '#000000', '#D3D3D3', '#BC8F8F', '#F08080', '#A52A2A', '#FF0000', '#FFA07A', '#A0522D', '#FFE4C4', '#FFE4B5', \
# # '#DAA520', '#FFD700', '#F0E68C', '#BDB76B', '#808000', '#FFFF00', '#9ACD32', '#7FFF00', '#8FBC8F', '#90EE90', \
# # '#32CD32', '#008000', '#00FF00', '#00FA9A', '#7FFFD4', '#48D1CC', '#2F4F4F', '#ADD8E6', '#87CEFA', '#DC143C', \
# # '#696969', '#9370DB', '#8A2BE2', '#9400D3', '#DDA0DD', '#FF00FF', '#C71585', '#DB7093', '#FFB6C1', '#bf9b0c', \
# # '#01889f', '#bb3f3f', '#1805db', '#48c072', '#fffd37', '#c44240', '#6140ef', '#ceaefa', '#04f489', '#c6f808', \
# # '#507b9c', '#cffdbc', '#ac7e04', '#01386a', '#ffb7ce', '#ffd1df', '#D2691E', '#FFDAB9', '#a55af4', '#95d0fc', \
# # ]
# # hex_code_list = np.array(hex_code_list).reshape(6,10).transpose(1,0).reshape(-1)
# # pano_color_map = np.array([[int(value * 255) for value in colors.hex2color(hex_code)] for hex_code in hex_code_list], dtype=np.uint8)
import matplotlib.pyplot as plt
from scipy.ndimage import rotate
def draw_fig(tensor, name='tensor_image_colored_no_white.png'):
tensor = tensor.squeeze(0)
tensor = rotate(tensor, -90, reshape=False)
tensor = np.flip(tensor, axis=1)
fig, ax = plt.subplots(figsize=(4, 4))
ax.imshow(tensor, cmap='viridis')
plt.axis('off')
plt.savefig(name, bbox_inches='tight', pad_inches=0)
plt.clf()
def generate_rgb_color(number):
red = (number % 256)
green = ((number // 256) % 256)
blue = ((number // 65536) % 256)
return [red, green, blue]
pano_color_map = np.array([generate_rgb_color(number) for number in np.random.randint(0, 65536*256, 256)])
inst_class_ids=[2, 3, 4, 5, 6, 7, 9, 10]
def occ2img(semantics=None, is_pano=False, panoptics=None):
H, W, D = semantics.shape
free_id = len(occ_class_names) - 1
semantics_2d = np.ones([H, W], dtype=np.int32) * free_id
for i in range(D):
semantics_i = semantics[..., i]
non_free_mask = (semantics_i != free_id)
semantics_2d[non_free_mask] = semantics_i[non_free_mask]
viz = color_map[semantics_2d]
viz = viz[..., :3]
inst_mask = np.zeros_like(semantics_2d).astype(np.bool)
for ind in inst_class_ids:
inst_mask[semantics_2d==ind] = True
if is_pano:
panoptics_2d = np.ones([H, W], dtype=np.int32) * 0
for i in range(D):
panoptics_i = panoptics[..., i]
semantics_i = semantics[..., i]
non_free_mask = (semantics_i != free_id)
panoptics_2d[non_free_mask] = panoptics_i[non_free_mask]
# # panoptics_2d = panoptics_2d%60
viz_pano = pano_color_map[panoptics_2d]
viz[inst_mask,:] = viz_pano[inst_mask,:]
viz = cv2.resize(viz, dsize=(800, 800))
return viz
def main():
parser = argparse.ArgumentParser(description='Validate a detector')
parser.add_argument('--config', required=True)
parser.add_argument('--weights', required=True)
parser.add_argument('--viz-dir', required=True)
parser.add_argument('--override', nargs='+', action=DictAction)
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--draw-sem-gt', action='store_true')
parser.add_argument('--draw-pano-gt', action='store_true')
parser.add_argument('--surround-view-img', action='store_true')
parser.add_argument('--surround-pano-gt', action='store_true')
args = parser.parse_args()
# parse configs
cfgs = Config.fromfile(args.config)
if args.override is not None:
cfgs.merge_from_dict(args.override)
cfgs = compat_cfg(cfgs)
# set multi-process settings
setup_multi_processes(cfgs)
# import modules from plguin/xx, registry will be updated
if hasattr(cfgs, 'plugin'):
if cfgs.plugin:
import importlib
if hasattr(cfgs, 'plugin_dir'):
plugin_dir = cfgs.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# use val-mini for visualization
#cfgs.data.val.ann_file = cfgs.data.val.ann_file.replace('val', 'val_mini')
# MMCV, please shut up
from mmcv.utils.logging import logger_initialized
logger_initialized['root'] = logging.Logger(__name__, logging.WARNING)
logger_initialized['mmcv'] = logging.Logger(__name__, logging.WARNING)
# you need one GPU
assert torch.cuda.is_available()
# assert torch.cuda.device_count() == 1
# logging
logging.info('Using GPU: %s' % torch.cuda.get_device_name(0))
# random seed
logging.info('Setting random seed: 0')
set_random_seed(0, deterministic=True)
logging.info('Loading validation set from %s' % cfgs.data.val.data_root)
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfgs.dist_params)
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)
test_loader_cfg = {
**test_dataloader_default_args,
**cfgs.data.get('test_dataloader', {})
}
dataset = build_dataset(cfgs.data.test)
test_loader_cfg['workers_per_gpu'] = 2
val_loader = build_dataloader(dataset, **test_loader_cfg)
# val_dataset = build_dataset(cfgs.data.test)
# val_loader = build_dataloader(
# val_dataset,
# samples_per_gpu=1,
# workers_per_gpu=1,
# num_gpus=1,
# dist=False,
# shuffle=False,
# seed=0,
# )
logging.info('Creating model: %s' % cfgs.model.type)
model = build_model(cfgs.model)
model.cuda()
model = MMDataParallel(model, [0])
model.eval()
logging.info('Loading checkpoint from %s' % args.weights)
load_checkpoint(
model, args.weights, map_location='cuda', strict=True,
logger=logging.Logger(__name__, logging.ERROR)
)
if not os.path.exists(args.viz_dir):
os.makedirs(args.viz_dir)
for i, data in tqdm(enumerate(val_loader)):
with torch.no_grad():
occ_pred = model(return_loss=False, rescale=True, **data)[0]
if False:
occ_bev_feature = occ_pred['occ_bev_feature']
outs = occ_pred['outs']
tensor = occ_bev_feature.max(dim=1)[0].cpu()
draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-occ_bev_feature.jpg' % i))
print(os.path.join(args.viz_dir, '%04d-occ_bev_feature.jpg' % i))
tensor = outs[0][0]['heatmap'].sigmoid().sum(dim=1)[0].cpu()
draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-heatmap.jpg' % i))
print(os.path.join(args.viz_dir, '%04d-heatmap.jpg' % i))
tensor = outs[0][0]['reg'][0,0].cpu()
tensor = outs[0][0]['reg'][0,1].cpu()
tensor = ((outs[0][0]['reg'][0,0]**2+outs[0][0]['reg'][0,1]**2)**0.5).unsqueeze(dim=0).cpu()
draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-reg.jpg' % i))
print(os.path.join(args.viz_dir, '%04d-reg.jpg' % i))
tensor = outs[0][0]['height'][0,0].cpu()
draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-height.jpg' % i))
print(os.path.join(args.viz_dir, '%04d-height.jpg' % i))
sem_pred = occ_pred['pred_occ']
cv2.imwrite(os.path.join(args.viz_dir, '%04d-sem.jpg' % i), occ2img(semantics=sem_pred.cpu())[..., ::-1])
print(os.path.join(args.viz_dir, '%04d-sem.jpg' % i))
inst_pred = occ_pred['pano_inst']
cv2.imwrite(os.path.join(args.viz_dir, '%04d-inst.jpg' % i), occ2img(semantics=sem_pred.cpu(), is_pano=True, panoptics=inst_pred.cpu())[..., ::-1])
print(os.path.join(args.viz_dir, '%04d-inst.jpg' % i))
if args.surround_view_img:
img = data['img_inputs'][0][0][0][::9].cpu().numpy()
mean = np.array([123.675, 116.28, 103.53], dtype=np.float32).reshape(1,3,1,1)
std = np.array([58.395, 57.12, 57.375], dtype=np.float32).reshape(1,3,1,1)
img = img*std + mean
img = img.astype(np.uint8).transpose(0,2,3,1)
up = np.concatenate([img[0,...], img[1,...], img[2,...]], 1)
down = np.concatenate([img[3,...], img[4,...], img[5,...]], 1)
out = np.concatenate([up, down], 0)
cv2.imwrite(os.path.join(args.viz_dir, '%04d-rgb.jpg' % i), out)
print(os.path.join(args.viz_dir, '%04d-rgb.jpg' % i))
if args.draw_sem_gt or args.draw_pano_gt:
occ_gt = np.load(os.path.join(val_loader.dataset.data_infos[i]['occ_path'].\
replace('data/nuscenes/gts/', 'data/nuscenes/occ3d_panoptic/'), 'labels.npz'))
pano_gt = occ_gt['instances']
sem_gt = occ_gt['semantics']
if args.draw_sem_gt:
# sem_gt = np.array(data['voxel_semantics'][0])[0]
cv2.imwrite(os.path.join(args.viz_dir, '%04d-sem-gt.jpg' % i), occ2img(semantics=sem_gt.cpu())[..., ::-1])
if args.draw_pano_gt:
cv2.imwrite(os.path.join(args.viz_dir, '%04d-pano-gt.jpg' % i), occ2img(semantics=sem_gt, is_pano=True, panoptics=pano_gt)[..., ::-1])
print(os.path.join(args.viz_dir, '%04d-pano-gt.jpg' % i))
if __name__ == '__main__':
main()
'''
exp_name=flashoccv2-r50-depth-tiny-pano
python tools/vis_occ.py --config projects/configs/flashoccv2/${exp_name}.py --weights work_dirs/${exp_name}/epoch_24_ema.pth --viz-dir vis/${exp_name} --draw-gt
exp_name=flashoccv2-r50-depth4d-longterm8f-pano
python tools/vis_occ.py --config projects/configs/flashoccv2/${exp_name}.py --weights work_dirs/${exp_name}/epoch_24_ema.pth --viz-dir vis/${exp_name} --draw-pano-gt #--draw-gt
'''
\ No newline at end of file
2026-04-03 16:24:21,502 - mmdet - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]
CUDA available: True
GPU 0,1,2,3,4,5,6,7: BW1000_H
CUDA_HOME: /opt/dtk
NVCC: Not Available
GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
PyTorch: 2.5.1
PyTorch compiling details: PyTorch built with:
- GCC 10.3
- C++ Version: 201703
- Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- LAPACK is enabled (usually provided by MKL)
- NNPACK is enabled
- CPU capability usage: AVX512
- HIP Runtime 6.3.25521
- MIOpen 2.18.0
- Magma 2.8.0
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF,
TorchVision: 0.20.1
OpenCV: 4.12.0
MMCV: 1.6.1
MMCV Compiler: GCC 10.3
MMCV CUDA Compiler: rocm not available
MMDetection: 2.25.1
MMSegmentation: 0.25.0
MMDetection3D: 1.0.0rc4+
spconv2.0: False
------------------------------------------------------------
2026-04-03 16:24:22,070 - mmdet - INFO - Distributed training: True
2026-04-03 16:24:22,636 - mmdet - INFO - Config:
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='PointToMultiViewDepth',
downsample=1,
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]),
dict(
type='Collect3D',
keys=[
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
'mask_camera'
])
]
test_pipeline = [
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
],
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=24,
workers_per_gpu=24,
train=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
is_train=True,
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='PointToMultiViewDepth',
downsample=1,
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
]),
dict(
type='Collect3D',
keys=[
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
'mask_camera'
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=False,
box_type_3d='LiDAR',
use_valid_flag=True,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'),
val=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=True,
box_type_3d='LiDAR',
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'),
test=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=True,
box_type_3d='LiDAR',
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'))
evaluation = dict(
interval=1,
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
start=20)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
log_config = dict(
interval=1,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/flashocc-r50'
load_from = 'ckpts/bevdet-r50-cbgs.pth'
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
data_config = dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0)
grid_config = dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 64
model = dict(
type='BEVDetOCC',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch'),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=256,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformer',
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5]),
input_size=(256, 704),
in_channels=256,
out_channels=64,
sid=False,
collapse_z=True,
downsample=16),
img_bev_encoder_backbone=dict(
type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
img_bev_encoder_neck=dict(
type='FPN_LSS', in_channels=640, out_channels=256),
occ_head=dict(
type='BEVOCCHead2D',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=True,
num_classes=18,
use_predicter=True,
class_balance=False,
loss_occ=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
ignore_index=255,
loss_weight=1.0)))
bda_aug_conf = dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5)
share_data_config = dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet')
test_data_config = dict(
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet')
key = 'test'
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
]
gpu_ids = range(0, 8)
2026-04-03 16:24:22,636 - mmdet - INFO - Set random seed to 0, deterministic: False
2026-04-03 16:24:22,888 - mmdet - INFO - initialize ResNet with init_cfg [{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
2026-04-03 16:24:22,995 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,995 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,996 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,996 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,997 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,997 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,998 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:22,999 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,000 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,000 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,001 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,002 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,003 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,005 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,008 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,010 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:24:23,021 - mmdet - INFO - initialize CustomFPN with init_cfg {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
Name of parameter - Initialization information
img_backbone.conv1.weight - torch.Size([64, 3, 7, 7]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.conv1.weight - torch.Size([64, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.conv2.weight - torch.Size([64, 64, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.bn2.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.bn2.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.conv3.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.bn3.weight - torch.Size([256]):
ConstantInit: val=0, bias=0
img_backbone.layer1.0.bn3.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.downsample.0.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.downsample.1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.downsample.1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.conv1.weight - torch.Size([64, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.1.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.conv2.weight - torch.Size([64, 64, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.1.bn2.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.bn2.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.conv3.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.1.bn3.weight - torch.Size([256]):
ConstantInit: val=0, bias=0
img_backbone.layer1.1.bn3.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.conv1.weight - torch.Size([64, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.2.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.conv2.weight - torch.Size([64, 64, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.2.bn2.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.bn2.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.conv3.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.2.bn3.weight - torch.Size([256]):
ConstantInit: val=0, bias=0
img_backbone.layer1.2.bn3.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.conv1.weight - torch.Size([128, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.0.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.downsample.0.weight - torch.Size([512, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.downsample.1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.downsample.1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.conv1.weight - torch.Size([128, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.1.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.1.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.1.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.1.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.conv1.weight - torch.Size([128, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.2.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.2.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.2.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.2.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.conv1.weight - torch.Size([128, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.3.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.3.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.3.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.3.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.conv1.weight - torch.Size([256, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.0.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.downsample.0.weight - torch.Size([1024, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.downsample.1.weight - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.downsample.1.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.1.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.1.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.1.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.1.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.2.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.2.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.2.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.2.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.3.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.3.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.3.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.3.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.4.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.4.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.4.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.4.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.5.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.5.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.5.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.5.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.conv1.weight - torch.Size([512, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.conv2.weight - torch.Size([512, 512, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.conv3.weight - torch.Size([2048, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.bn3.weight - torch.Size([2048]):
ConstantInit: val=0, bias=0
img_backbone.layer4.0.bn3.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.downsample.0.weight - torch.Size([2048, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.downsample.1.weight - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.downsample.1.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.conv1.weight - torch.Size([512, 2048, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.1.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.conv2.weight - torch.Size([512, 512, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.1.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.conv3.weight - torch.Size([2048, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.1.bn3.weight - torch.Size([2048]):
ConstantInit: val=0, bias=0
img_backbone.layer4.1.bn3.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.conv1.weight - torch.Size([512, 2048, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.2.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.conv2.weight - torch.Size([512, 512, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.2.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.conv3.weight - torch.Size([2048, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.2.bn3.weight - torch.Size([2048]):
ConstantInit: val=0, bias=0
img_backbone.layer4.2.bn3.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_neck.lateral_convs.0.conv.weight - torch.Size([256, 1024, 1, 1]):
XavierInit: gain=1, distribution=uniform, bias=0
img_neck.lateral_convs.0.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_neck.lateral_convs.1.conv.weight - torch.Size([256, 2048, 1, 1]):
XavierInit: gain=1, distribution=uniform, bias=0
img_neck.lateral_convs.1.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_neck.fpn_convs.0.conv.weight - torch.Size([256, 256, 3, 3]):
XavierInit: gain=1, distribution=uniform, bias=0
img_neck.fpn_convs.0.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_view_transformer.depth_net.weight - torch.Size([152, 256, 1, 1]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_view_transformer.depth_net.bias - torch.Size([152]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.conv1.weight - torch.Size([128, 64, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.conv2.weight - torch.Size([128, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.downsample.weight - torch.Size([128, 64, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.downsample.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.conv1.weight - torch.Size([128, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.conv2.weight - torch.Size([128, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.conv1.weight - torch.Size([256, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.conv2.weight - torch.Size([256, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.downsample.weight - torch.Size([256, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.downsample.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.conv1.weight - torch.Size([256, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.conv2.weight - torch.Size([256, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.conv1.weight - torch.Size([512, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.conv2.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.downsample.weight - torch.Size([512, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.downsample.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.conv1.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.conv2.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.0.weight - torch.Size([512, 640, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.3.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.4.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.4.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.1.weight - torch.Size([256, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.4.weight - torch.Size([256, 256, 1, 1]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.4.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.final_conv.conv.weight - torch.Size([256, 256, 3, 3]):
Initialized by user-defined `init_weights` in ConvModule
occ_head.final_conv.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.0.weight - torch.Size([512, 256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.0.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.2.weight - torch.Size([288, 512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.2.bias - torch.Size([288]):
The value is the same before and after calling `init_weights` of BEVDetOCC
2026-04-03 16:24:23,032 - mmdet - INFO - Model:
BEVDetOCC(
(img_backbone): ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
(layer2): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
(layer3): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(3): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(4): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(5): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
(layer4): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
)
init_cfg=[{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
(img_neck): CustomFPN(
(lateral_convs): ModuleList(
(0): ConvModule(
(conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
)
(1): ConvModule(
(conv): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(fpn_convs): ModuleList(
(0): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
init_cfg={'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
(img_view_transformer): LSSViewTransformer(
(depth_net): Conv2d(256, 152, kernel_size=(1, 1), stride=(1, 1))
)
(img_bev_encoder_backbone): CustomResNet(
(layers): Sequential(
(0): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
)
)
(img_bev_encoder_neck): FPN_LSS(
(up): Upsample(scale_factor=4.0, mode='bilinear')
(conv): Sequential(
(0): Conv2d(640, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
)
(up2): Sequential(
(0): Upsample(scale_factor=2.0, mode='bilinear')
(1): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU(inplace=True)
(4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(occ_head): BEVOCCHead2D(
(final_conv): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(activate): ReLU(inplace=True)
)
(predicter): Sequential(
(0): Linear(in_features=256, out_features=512, bias=True)
(1): Softplus(beta=1.0, threshold=20.0)
(2): Linear(in_features=512, out_features=288, bias=True)
)
(loss_occ): CrossEntropyLoss(avg_non_ignore=False)
)
)
2026-04-03 16:24:32,890 - mmdet - INFO - load checkpoint from local path: ckpts/bevdet-r50-cbgs.pth
2026-04-03 16:24:32,998 - mmdet - WARNING - The model and loaded state dict do not match exactly
size mismatch for img_view_transformer.depth_net.weight: copying a param with shape torch.Size([123, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([152, 256, 1, 1]).
size mismatch for img_view_transformer.depth_net.bias: copying a param with shape torch.Size([123]) from checkpoint, the shape in current model is torch.Size([152]).
unexpected key in source state_dict: pts_bbox_head.shared_conv.conv.weight, pts_bbox_head.shared_conv.bn.weight, pts_bbox_head.shared_conv.bn.bias, pts_bbox_head.shared_conv.bn.running_mean, pts_bbox_head.shared_conv.bn.running_var, pts_bbox_head.shared_conv.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.0.conv.weight, pts_bbox_head.task_heads.0.reg.0.bn.weight, pts_bbox_head.task_heads.0.reg.0.bn.bias, pts_bbox_head.task_heads.0.reg.0.bn.running_mean, pts_bbox_head.task_heads.0.reg.0.bn.running_var, pts_bbox_head.task_heads.0.reg.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.1.weight, pts_bbox_head.task_heads.0.reg.1.bias, pts_bbox_head.task_heads.0.height.0.conv.weight, pts_bbox_head.task_heads.0.height.0.bn.weight, pts_bbox_head.task_heads.0.height.0.bn.bias, pts_bbox_head.task_heads.0.height.0.bn.running_mean, pts_bbox_head.task_heads.0.height.0.bn.running_var, pts_bbox_head.task_heads.0.height.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.height.1.weight, pts_bbox_head.task_heads.0.height.1.bias, pts_bbox_head.task_heads.0.dim.0.conv.weight, pts_bbox_head.task_heads.0.dim.0.bn.weight, pts_bbox_head.task_heads.0.dim.0.bn.bias, pts_bbox_head.task_heads.0.dim.0.bn.running_mean, pts_bbox_head.task_heads.0.dim.0.bn.running_var, pts_bbox_head.task_heads.0.dim.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.dim.1.weight, pts_bbox_head.task_heads.0.dim.1.bias, pts_bbox_head.task_heads.0.rot.0.conv.weight, pts_bbox_head.task_heads.0.rot.0.bn.weight, pts_bbox_head.task_heads.0.rot.0.bn.bias, pts_bbox_head.task_heads.0.rot.0.bn.running_mean, pts_bbox_head.task_heads.0.rot.0.bn.running_var, pts_bbox_head.task_heads.0.rot.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.rot.1.weight, pts_bbox_head.task_heads.0.rot.1.bias, pts_bbox_head.task_heads.0.vel.0.conv.weight, pts_bbox_head.task_heads.0.vel.0.bn.weight, pts_bbox_head.task_heads.0.vel.0.bn.bias, pts_bbox_head.task_heads.0.vel.0.bn.running_mean, pts_bbox_head.task_heads.0.vel.0.bn.running_var, pts_bbox_head.task_heads.0.vel.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.vel.1.weight, pts_bbox_head.task_heads.0.vel.1.bias, pts_bbox_head.task_heads.0.heatmap.0.conv.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.bias, pts_bbox_head.task_heads.0.heatmap.0.bn.running_mean, pts_bbox_head.task_heads.0.heatmap.0.bn.running_var, pts_bbox_head.task_heads.0.heatmap.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.heatmap.1.weight, pts_bbox_head.task_heads.0.heatmap.1.bias
missing keys in source state_dict: occ_head.final_conv.conv.weight, occ_head.final_conv.conv.bias, occ_head.predicter.0.weight, occ_head.predicter.0.bias, occ_head.predicter.2.weight, occ_head.predicter.2.bias
2026-04-03 16:24:33,000 - mmdet - INFO - Start running, host: root@bw61, work_dir: /workspace/Flashocc/work_dirs/flashocc-r50
2026-04-03 16:24:33,001 - mmdet - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) CheckpointHook
(NORMAL ) MEGVIIEMAHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_train_epoch:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) DistSamplerSeedHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_train_iter:
(VERY_HIGH ) StepLrUpdaterHook
(LOW ) IterTimerHook
--------------------
after_train_iter:
(ABOVE_NORMAL) OptimizerHook
(NORMAL ) CheckpointHook
(NORMAL ) MEGVIIEMAHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
after_train_epoch:
(NORMAL ) CheckpointHook
(NORMAL ) MEGVIIEMAHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_val_epoch:
(NORMAL ) DistSamplerSeedHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_val_iter:
(LOW ) IterTimerHook
--------------------
after_val_iter:
(LOW ) IterTimerHook
--------------------
after_val_epoch:
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
after_run:
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
2026-04-03 16:24:33,001 - mmdet - INFO - workflow: [('train', 1)], max: 24 epochs
2026-04-03 16:24:33,001 - mmdet - INFO - Checkpoints will be saved to /workspace/Flashocc/work_dirs/flashocc-r50 by HardDiskBackend.
{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW1000_H\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.5.1\nPyTorch compiling details: PyTorch built with:\n - GCC 10.3\n - C++ Version: 201703\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX512\n - HIP Runtime 6.3.25521\n - MIOpen 2.18.0\n - Magma 2.8.0\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.20.1\nOpenCV: 4.12.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 10.3\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.25.1\nMMSegmentation: 0.25.0\nMMDetection3D: 1.0.0rc4+\nspconv2.0: False", "config": "point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\nclass_names = [\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\ndataset_type = 'NuScenesDatasetOccpancy'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False)\nfile_client_args = dict(backend='disk')\ntrain_pipeline = [\n dict(\n type='PrepareImageInputs',\n is_train=True,\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n is_train=True),\n dict(type='LoadOccGTFromFile'),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='PointToMultiViewDepth',\n downsample=1,\n grid_config=dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5])),\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ]),\n dict(\n type='Collect3D',\n keys=[\n 'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n 'mask_camera'\n ])\n]\ntest_pipeline = [\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n]\neval_pipeline = [\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='LoadPointsFromMultiSweeps',\n sweeps_num=10,\n file_client_args=dict(backend='disk')),\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points'])\n]\ndata = dict(\n samples_per_gpu=24,\n workers_per_gpu=24,\n train=dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',\n pipeline=[\n dict(\n type='PrepareImageInputs',\n is_train=True,\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=True),\n dict(type='LoadOccGTFromFile'),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='PointToMultiViewDepth',\n downsample=1,\n grid_config=dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5])),\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ]),\n dict(\n type='Collect3D',\n keys=[\n 'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n 'mask_camera'\n ])\n ],\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n test_mode=False,\n box_type_3d='LiDAR',\n use_valid_flag=True,\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet'),\n val=dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n test_mode=True,\n box_type_3d='LiDAR',\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet'),\n test=dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n test_mode=True,\n box_type_3d='LiDAR',\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet'))\nevaluation = dict(\n interval=1,\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n start=20)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=5)\nlog_config = dict(\n interval=1,\n hooks=[dict(type='TextLoggerHook'),\n dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/flashocc-r50'\nload_from = 'ckpts/bevdet-r50-cbgs.pth'\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\nplugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndata_config = dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0)\ngrid_config = dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5])\nvoxel_size = [0.1, 0.1, 0.2]\nnumC_Trans = 64\nmodel = dict(\n type='BEVDetOCC',\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4,\n out_indices=(2, 3),\n frozen_stages=-1,\n norm_cfg=dict(type='BN', requires_grad=True),\n norm_eval=False,\n with_cp=True,\n style='pytorch'),\n img_neck=dict(\n type='CustomFPN',\n in_channels=[1024, 2048],\n out_channels=256,\n num_outs=1,\n start_level=0,\n out_ids=[0]),\n img_view_transformer=dict(\n type='LSSViewTransformer',\n grid_config=dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5]),\n input_size=(256, 704),\n in_channels=256,\n out_channels=64,\n sid=False,\n collapse_z=True,\n downsample=16),\n img_bev_encoder_backbone=dict(\n type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),\n img_bev_encoder_neck=dict(\n type='FPN_LSS', in_channels=640, out_channels=256),\n occ_head=dict(\n type='BEVOCCHead2D',\n in_dim=256,\n out_dim=256,\n Dz=16,\n use_mask=True,\n num_classes=18,\n use_predicter=True,\n class_balance=False,\n loss_occ=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n ignore_index=255,\n loss_weight=1.0)))\nbda_aug_conf = dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5)\nshare_data_config = dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet')\ntest_data_config = dict(\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet')\nkey = 'test'\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=200,\n warmup_ratio=0.001,\n step=[24])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\ncustom_hooks = [\n dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')\n]\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "flashocc-r50.py"}
2026-04-03 16:26:51,896 - mmdet - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]
CUDA available: True
GPU 0,1,2,3,4,5,6,7: BW1000_H
CUDA_HOME: /opt/dtk
NVCC: Not Available
GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
PyTorch: 2.5.1
PyTorch compiling details: PyTorch built with:
- GCC 10.3
- C++ Version: 201703
- Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- LAPACK is enabled (usually provided by MKL)
- NNPACK is enabled
- CPU capability usage: AVX512
- HIP Runtime 6.3.25521
- MIOpen 2.18.0
- Magma 2.8.0
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF,
TorchVision: 0.20.1
OpenCV: 4.12.0
MMCV: 1.6.1
MMCV Compiler: GCC 10.3
MMCV CUDA Compiler: rocm not available
MMDetection: 2.25.1
MMSegmentation: 0.25.0
MMDetection3D: 1.0.0rc4+
spconv2.0: False
------------------------------------------------------------
2026-04-03 16:26:52,455 - mmdet - INFO - Distributed training: True
2026-04-03 16:26:53,024 - mmdet - INFO - Config:
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='PointToMultiViewDepth',
downsample=1,
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]),
dict(
type='Collect3D',
keys=[
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
'mask_camera'
])
]
test_pipeline = [
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
],
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=24,
workers_per_gpu=24,
train=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
is_train=True,
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='PointToMultiViewDepth',
downsample=1,
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
]),
dict(
type='Collect3D',
keys=[
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
'mask_camera'
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=False,
box_type_3d='LiDAR',
use_valid_flag=True,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'),
val=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=True,
box_type_3d='LiDAR',
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'),
test=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=True,
box_type_3d='LiDAR',
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'))
evaluation = dict(
interval=1,
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
start=20)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
log_config = dict(
interval=1,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/flashocc-r50'
load_from = 'ckpts/bevdet-r50-cbgs.pth'
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
data_config = dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0)
grid_config = dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 64
model = dict(
type='BEVDetOCC',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch'),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=256,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformer',
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5]),
input_size=(256, 704),
in_channels=256,
out_channels=64,
sid=False,
collapse_z=True,
downsample=16),
img_bev_encoder_backbone=dict(
type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
img_bev_encoder_neck=dict(
type='FPN_LSS', in_channels=640, out_channels=256),
occ_head=dict(
type='BEVOCCHead2D',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=True,
num_classes=18,
use_predicter=True,
class_balance=False,
loss_occ=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
ignore_index=255,
loss_weight=1.0)))
bda_aug_conf = dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5)
share_data_config = dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet')
test_data_config = dict(
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet')
key = 'test'
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
]
gpu_ids = range(0, 8)
2026-04-03 16:26:53,024 - mmdet - INFO - Set random seed to 0, deterministic: False
2026-04-03 16:26:53,275 - mmdet - INFO - initialize ResNet with init_cfg [{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
2026-04-03 16:26:53,383 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,383 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,383 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,384 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,385 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,385 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,386 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,387 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,388 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,388 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,389 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,390 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,391 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,393 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,396 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,399 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
2026-04-03 16:26:53,410 - mmdet - INFO - initialize CustomFPN with init_cfg {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
Name of parameter - Initialization information
img_backbone.conv1.weight - torch.Size([64, 3, 7, 7]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.conv1.weight - torch.Size([64, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.conv2.weight - torch.Size([64, 64, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.bn2.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.bn2.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.conv3.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.bn3.weight - torch.Size([256]):
ConstantInit: val=0, bias=0
img_backbone.layer1.0.bn3.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.downsample.0.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.0.downsample.1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.0.downsample.1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.conv1.weight - torch.Size([64, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.1.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.conv2.weight - torch.Size([64, 64, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.1.bn2.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.bn2.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.1.conv3.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.1.bn3.weight - torch.Size([256]):
ConstantInit: val=0, bias=0
img_backbone.layer1.1.bn3.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.conv1.weight - torch.Size([64, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.2.bn1.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.bn1.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.conv2.weight - torch.Size([64, 64, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.2.bn2.weight - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.bn2.bias - torch.Size([64]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer1.2.conv3.weight - torch.Size([256, 64, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer1.2.bn3.weight - torch.Size([256]):
ConstantInit: val=0, bias=0
img_backbone.layer1.2.bn3.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.conv1.weight - torch.Size([128, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.0.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.downsample.0.weight - torch.Size([512, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.0.downsample.1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.0.downsample.1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.conv1.weight - torch.Size([128, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.1.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.1.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.1.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.1.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.1.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.conv1.weight - torch.Size([128, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.2.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.2.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.2.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.2.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.2.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.conv1.weight - torch.Size([128, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.3.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.conv2.weight - torch.Size([128, 128, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.3.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer2.3.conv3.weight - torch.Size([512, 128, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer2.3.bn3.weight - torch.Size([512]):
ConstantInit: val=0, bias=0
img_backbone.layer2.3.bn3.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.conv1.weight - torch.Size([256, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.0.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.downsample.0.weight - torch.Size([1024, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.0.downsample.1.weight - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.0.downsample.1.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.1.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.1.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.1.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.1.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.1.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.2.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.2.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.2.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.2.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.2.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.3.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.3.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.3.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.3.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.3.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.4.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.4.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.4.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.4.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.4.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.conv1.weight - torch.Size([256, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.5.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.conv2.weight - torch.Size([256, 256, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.5.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer3.5.conv3.weight - torch.Size([1024, 256, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer3.5.bn3.weight - torch.Size([1024]):
ConstantInit: val=0, bias=0
img_backbone.layer3.5.bn3.bias - torch.Size([1024]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.conv1.weight - torch.Size([512, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.conv2.weight - torch.Size([512, 512, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.conv3.weight - torch.Size([2048, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.bn3.weight - torch.Size([2048]):
ConstantInit: val=0, bias=0
img_backbone.layer4.0.bn3.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.downsample.0.weight - torch.Size([2048, 1024, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.0.downsample.1.weight - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.0.downsample.1.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.conv1.weight - torch.Size([512, 2048, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.1.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.conv2.weight - torch.Size([512, 512, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.1.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.1.conv3.weight - torch.Size([2048, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.1.bn3.weight - torch.Size([2048]):
ConstantInit: val=0, bias=0
img_backbone.layer4.1.bn3.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.conv1.weight - torch.Size([512, 2048, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.2.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.conv2.weight - torch.Size([512, 512, 3, 3]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.2.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_backbone.layer4.2.conv3.weight - torch.Size([2048, 512, 1, 1]):
KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0
img_backbone.layer4.2.bn3.weight - torch.Size([2048]):
ConstantInit: val=0, bias=0
img_backbone.layer4.2.bn3.bias - torch.Size([2048]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_neck.lateral_convs.0.conv.weight - torch.Size([256, 1024, 1, 1]):
XavierInit: gain=1, distribution=uniform, bias=0
img_neck.lateral_convs.0.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_neck.lateral_convs.1.conv.weight - torch.Size([256, 2048, 1, 1]):
XavierInit: gain=1, distribution=uniform, bias=0
img_neck.lateral_convs.1.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_neck.fpn_convs.0.conv.weight - torch.Size([256, 256, 3, 3]):
XavierInit: gain=1, distribution=uniform, bias=0
img_neck.fpn_convs.0.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_view_transformer.depth_net.weight - torch.Size([152, 256, 1, 1]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_view_transformer.depth_net.bias - torch.Size([152]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.conv1.weight - torch.Size([128, 64, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.conv2.weight - torch.Size([128, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.downsample.weight - torch.Size([128, 64, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.0.downsample.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.conv1.weight - torch.Size([128, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn1.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn1.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.conv2.weight - torch.Size([128, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn2.weight - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.0.1.bn2.bias - torch.Size([128]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.conv1.weight - torch.Size([256, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.conv2.weight - torch.Size([256, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.downsample.weight - torch.Size([256, 128, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.0.downsample.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.conv1.weight - torch.Size([256, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn1.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn1.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.conv2.weight - torch.Size([256, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.1.1.bn2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.conv1.weight - torch.Size([512, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.conv2.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.downsample.weight - torch.Size([512, 256, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.0.downsample.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.conv1.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.conv2.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn2.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_backbone.layers.2.1.bn2.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.0.weight - torch.Size([512, 640, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.1.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.1.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.3.weight - torch.Size([512, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.4.weight - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.conv.4.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.1.weight - torch.Size([256, 512, 3, 3]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.2.weight - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.2.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.4.weight - torch.Size([256, 256, 1, 1]):
The value is the same before and after calling `init_weights` of BEVDetOCC
img_bev_encoder_neck.up2.4.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.final_conv.conv.weight - torch.Size([256, 256, 3, 3]):
Initialized by user-defined `init_weights` in ConvModule
occ_head.final_conv.conv.bias - torch.Size([256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.0.weight - torch.Size([512, 256]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.0.bias - torch.Size([512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.2.weight - torch.Size([288, 512]):
The value is the same before and after calling `init_weights` of BEVDetOCC
occ_head.predicter.2.bias - torch.Size([288]):
The value is the same before and after calling `init_weights` of BEVDetOCC
2026-04-03 16:26:53,421 - mmdet - INFO - Model:
BEVDetOCC(
(img_backbone): ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
(layer2): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
(layer3): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(3): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(4): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(5): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
(layer4): ResLayer(
(0): Bottleneck(
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(1): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
)
)
init_cfg=[{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
(img_neck): CustomFPN(
(lateral_convs): ModuleList(
(0): ConvModule(
(conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
)
(1): ConvModule(
(conv): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(fpn_convs): ModuleList(
(0): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
init_cfg={'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
(img_view_transformer): LSSViewTransformer(
(depth_net): Conv2d(256, 152, kernel_size=(1, 1), stride=(1, 1))
)
(img_bev_encoder_backbone): CustomResNet(
(layers): Sequential(
(0): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
)
)
(img_bev_encoder_neck): FPN_LSS(
(up): Upsample(scale_factor=4.0, mode='bilinear')
(conv): Sequential(
(0): Conv2d(640, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
)
(up2): Sequential(
(0): Upsample(scale_factor=2.0, mode='bilinear')
(1): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU(inplace=True)
(4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(occ_head): BEVOCCHead2D(
(final_conv): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(activate): ReLU(inplace=True)
)
(predicter): Sequential(
(0): Linear(in_features=256, out_features=512, bias=True)
(1): Softplus(beta=1.0, threshold=20.0)
(2): Linear(in_features=512, out_features=288, bias=True)
)
(loss_occ): CrossEntropyLoss(avg_non_ignore=False)
)
)
2026-04-03 16:26:58,319 - mmdet - INFO - load checkpoint from local path: ckpts/bevdet-r50-cbgs.pth
2026-04-03 16:26:58,420 - mmdet - WARNING - The model and loaded state dict do not match exactly
size mismatch for img_view_transformer.depth_net.weight: copying a param with shape torch.Size([123, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([152, 256, 1, 1]).
size mismatch for img_view_transformer.depth_net.bias: copying a param with shape torch.Size([123]) from checkpoint, the shape in current model is torch.Size([152]).
unexpected key in source state_dict: pts_bbox_head.shared_conv.conv.weight, pts_bbox_head.shared_conv.bn.weight, pts_bbox_head.shared_conv.bn.bias, pts_bbox_head.shared_conv.bn.running_mean, pts_bbox_head.shared_conv.bn.running_var, pts_bbox_head.shared_conv.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.0.conv.weight, pts_bbox_head.task_heads.0.reg.0.bn.weight, pts_bbox_head.task_heads.0.reg.0.bn.bias, pts_bbox_head.task_heads.0.reg.0.bn.running_mean, pts_bbox_head.task_heads.0.reg.0.bn.running_var, pts_bbox_head.task_heads.0.reg.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.1.weight, pts_bbox_head.task_heads.0.reg.1.bias, pts_bbox_head.task_heads.0.height.0.conv.weight, pts_bbox_head.task_heads.0.height.0.bn.weight, pts_bbox_head.task_heads.0.height.0.bn.bias, pts_bbox_head.task_heads.0.height.0.bn.running_mean, pts_bbox_head.task_heads.0.height.0.bn.running_var, pts_bbox_head.task_heads.0.height.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.height.1.weight, pts_bbox_head.task_heads.0.height.1.bias, pts_bbox_head.task_heads.0.dim.0.conv.weight, pts_bbox_head.task_heads.0.dim.0.bn.weight, pts_bbox_head.task_heads.0.dim.0.bn.bias, pts_bbox_head.task_heads.0.dim.0.bn.running_mean, pts_bbox_head.task_heads.0.dim.0.bn.running_var, pts_bbox_head.task_heads.0.dim.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.dim.1.weight, pts_bbox_head.task_heads.0.dim.1.bias, pts_bbox_head.task_heads.0.rot.0.conv.weight, pts_bbox_head.task_heads.0.rot.0.bn.weight, pts_bbox_head.task_heads.0.rot.0.bn.bias, pts_bbox_head.task_heads.0.rot.0.bn.running_mean, pts_bbox_head.task_heads.0.rot.0.bn.running_var, pts_bbox_head.task_heads.0.rot.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.rot.1.weight, pts_bbox_head.task_heads.0.rot.1.bias, pts_bbox_head.task_heads.0.vel.0.conv.weight, pts_bbox_head.task_heads.0.vel.0.bn.weight, pts_bbox_head.task_heads.0.vel.0.bn.bias, pts_bbox_head.task_heads.0.vel.0.bn.running_mean, pts_bbox_head.task_heads.0.vel.0.bn.running_var, pts_bbox_head.task_heads.0.vel.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.vel.1.weight, pts_bbox_head.task_heads.0.vel.1.bias, pts_bbox_head.task_heads.0.heatmap.0.conv.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.bias, pts_bbox_head.task_heads.0.heatmap.0.bn.running_mean, pts_bbox_head.task_heads.0.heatmap.0.bn.running_var, pts_bbox_head.task_heads.0.heatmap.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.heatmap.1.weight, pts_bbox_head.task_heads.0.heatmap.1.bias
missing keys in source state_dict: occ_head.final_conv.conv.weight, occ_head.final_conv.conv.bias, occ_head.predicter.0.weight, occ_head.predicter.0.bias, occ_head.predicter.2.weight, occ_head.predicter.2.bias
2026-04-03 16:26:58,422 - mmdet - INFO - Start running, host: root@bw61, work_dir: /workspace/Flashocc/work_dirs/flashocc-r50
2026-04-03 16:26:58,422 - mmdet - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) CheckpointHook
(NORMAL ) MEGVIIEMAHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_train_epoch:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) DistSamplerSeedHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_train_iter:
(VERY_HIGH ) StepLrUpdaterHook
(LOW ) IterTimerHook
--------------------
after_train_iter:
(ABOVE_NORMAL) OptimizerHook
(NORMAL ) CheckpointHook
(NORMAL ) MEGVIIEMAHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
after_train_epoch:
(NORMAL ) CheckpointHook
(NORMAL ) MEGVIIEMAHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_val_epoch:
(NORMAL ) DistSamplerSeedHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
before_val_iter:
(LOW ) IterTimerHook
--------------------
after_val_iter:
(LOW ) IterTimerHook
--------------------
after_val_epoch:
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
after_run:
(VERY_LOW ) TextLoggerHook
(VERY_LOW ) TensorboardLoggerHook
--------------------
2026-04-03 16:26:58,423 - mmdet - INFO - workflow: [('train', 1)], max: 24 epochs
2026-04-03 16:26:58,423 - mmdet - INFO - Checkpoints will be saved to /workspace/Flashocc/work_dirs/flashocc-r50 by HardDiskBackend.
2026-04-03 16:37:58,297 - mmdet - INFO - Epoch [1][1/147] lr: 1.000e-07, eta: 26 days, 22:22:17, time: 659.750, data_time: 15.432, memory: 32423, loss_occ: 3.0086, loss: 3.0086, grad_norm: 3.9001
2026-04-03 16:38:03,001 - mmdet - INFO - Epoch [1][2/147] lr: 5.995e-07, eta: 13 days, 13:23:57, time: 4.707, data_time: 0.005, memory: 32769, loss_occ: 3.0129, loss: 3.0129, grad_norm: 3.9387
2026-04-03 16:38:04,134 - mmdet - INFO - Epoch [1][3/147] lr: 1.099e-06, eta: 9 days, 1:14:27, time: 1.133, data_time: 0.003, memory: 32769, loss_occ: 3.0150, loss: 3.0150, grad_norm: 3.8985
2026-04-03 16:38:05,261 - mmdet - INFO - Epoch [1][4/147] lr: 1.599e-06, eta: 6 days, 19:09:36, time: 1.126, data_time: 0.002, memory: 32769, loss_occ: 3.0085, loss: 3.0085, grad_norm: 3.8741
2026-04-03 16:38:06,388 - mmdet - INFO - Epoch [1][5/147] lr: 2.098e-06, eta: 5 days, 10:42:41, time: 1.127, data_time: 0.003, memory: 32769, loss_occ: 3.0044, loss: 3.0044, grad_norm: 3.8639
2026-04-03 16:38:07,513 - mmdet - INFO - Epoch [1][6/147] lr: 2.597e-06, eta: 4 days, 13:04:44, time: 1.126, data_time: 0.003, memory: 32769, loss_occ: 3.0085, loss: 3.0085, grad_norm: 3.8423
2026-04-03 16:38:08,643 - mmdet - INFO - Epoch [1][7/147] lr: 3.097e-06, eta: 3 days, 21:37:37, time: 1.126, data_time: 0.003, memory: 32769, loss_occ: 3.0085, loss: 3.0085, grad_norm: 3.9473
2026-04-03 16:38:09,771 - mmdet - INFO - Epoch [1][8/147] lr: 3.597e-06, eta: 3 days, 10:02:19, time: 1.130, data_time: 0.006, memory: 32769, loss_occ: 3.0038, loss: 3.0038, grad_norm: 3.9215
2026-04-03 16:38:10,898 - mmdet - INFO - Epoch [1][9/147] lr: 4.096e-06, eta: 3 days, 1:01:30, time: 1.128, data_time: 0.004, memory: 32769, loss_occ: 2.9969, loss: 2.9969, grad_norm: 3.8659
2026-04-03 16:38:12,026 - mmdet - INFO - Epoch [1][10/147] lr: 4.596e-06, eta: 2 days, 17:48:50, time: 1.127, data_time: 0.003, memory: 32769, loss_occ: 2.9957, loss: 2.9957, grad_norm: 3.8820
{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW1000_H\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.5.1\nPyTorch compiling details: PyTorch built with:\n - GCC 10.3\n - C++ Version: 201703\n - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX512\n - HIP Runtime 6.3.25521\n - MIOpen 2.18.0\n - Magma 2.8.0\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.20.1\nOpenCV: 4.12.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 10.3\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.25.1\nMMSegmentation: 0.25.0\nMMDetection3D: 1.0.0rc4+\nspconv2.0: False", "config": "point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\nclass_names = [\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\ndataset_type = 'NuScenesDatasetOccpancy'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False)\nfile_client_args = dict(backend='disk')\ntrain_pipeline = [\n dict(\n type='PrepareImageInputs',\n is_train=True,\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n is_train=True),\n dict(type='LoadOccGTFromFile'),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='PointToMultiViewDepth',\n downsample=1,\n grid_config=dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5])),\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ]),\n dict(\n type='Collect3D',\n keys=[\n 'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n 'mask_camera'\n ])\n]\ntest_pipeline = [\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n]\neval_pipeline = [\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='LoadPointsFromMultiSweeps',\n sweeps_num=10,\n file_client_args=dict(backend='disk')),\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points'])\n]\ndata = dict(\n samples_per_gpu=24,\n workers_per_gpu=24,\n train=dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',\n pipeline=[\n dict(\n type='PrepareImageInputs',\n is_train=True,\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=True),\n dict(type='LoadOccGTFromFile'),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='PointToMultiViewDepth',\n downsample=1,\n grid_config=dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5])),\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ]),\n dict(\n type='Collect3D',\n keys=[\n 'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n 'mask_camera'\n ])\n ],\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n test_mode=False,\n box_type_3d='LiDAR',\n use_valid_flag=True,\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet'),\n val=dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n test_mode=True,\n box_type_3d='LiDAR',\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet'),\n test=dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n test_mode=True,\n box_type_3d='LiDAR',\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet'))\nevaluation = dict(\n interval=1,\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n start=20)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=5)\nlog_config = dict(\n interval=1,\n hooks=[dict(type='TextLoggerHook'),\n dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/flashocc-r50'\nload_from = 'ckpts/bevdet-r50-cbgs.pth'\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\nplugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndata_config = dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0)\ngrid_config = dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5])\nvoxel_size = [0.1, 0.1, 0.2]\nnumC_Trans = 64\nmodel = dict(\n type='BEVDetOCC',\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4,\n out_indices=(2, 3),\n frozen_stages=-1,\n norm_cfg=dict(type='BN', requires_grad=True),\n norm_eval=False,\n with_cp=True,\n style='pytorch'),\n img_neck=dict(\n type='CustomFPN',\n in_channels=[1024, 2048],\n out_channels=256,\n num_outs=1,\n start_level=0,\n out_ids=[0]),\n img_view_transformer=dict(\n type='LSSViewTransformer',\n grid_config=dict(\n x=[-40, 40, 0.4],\n y=[-40, 40, 0.4],\n z=[-1, 5.4, 6.4],\n depth=[1.0, 45.0, 0.5]),\n input_size=(256, 704),\n in_channels=256,\n out_channels=64,\n sid=False,\n collapse_z=True,\n downsample=16),\n img_bev_encoder_backbone=dict(\n type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),\n img_bev_encoder_neck=dict(\n type='FPN_LSS', in_channels=640, out_channels=256),\n occ_head=dict(\n type='BEVOCCHead2D',\n in_dim=256,\n out_dim=256,\n Dz=16,\n use_mask=True,\n num_classes=18,\n use_predicter=True,\n class_balance=False,\n loss_occ=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n ignore_index=255,\n loss_weight=1.0)))\nbda_aug_conf = dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5)\nshare_data_config = dict(\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet')\ntest_data_config = dict(\n pipeline=[\n dict(\n type='PrepareImageInputs',\n data_config=dict(\n cams=[\n 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n ],\n Ncams=6,\n input_size=(256, 704),\n src_size=(900, 1600),\n resize=(-0.06, 0.11),\n rot=(-5.4, 5.4),\n flip=True,\n crop_h=(0.0, 0.0),\n resize_test=0.0),\n sequential=False),\n dict(\n type='LoadAnnotationsBEVDepth',\n bda_aug_conf=dict(\n rot_lim=(-0.0, 0.0),\n scale_lim=(1.0, 1.0),\n flip_dx_ratio=0.5,\n flip_dy_ratio=0.5),\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n 'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n 'traffic_cone'\n ],\n is_train=False),\n dict(\n type='LoadPointsFromFile',\n coord_type='LIDAR',\n load_dim=5,\n use_dim=5,\n file_client_args=dict(backend='disk')),\n dict(\n type='MultiScaleFlipAug3D',\n img_scale=(1333, 800),\n pts_scale_ratio=1,\n flip=False,\n transforms=[\n dict(\n type='DefaultFormatBundle3D',\n class_names=[\n 'car', 'truck', 'construction_vehicle', 'bus',\n 'trailer', 'barrier', 'motorcycle', 'bicycle',\n 'pedestrian', 'traffic_cone'\n ],\n with_label=False),\n dict(type='Collect3D', keys=['points', 'img_inputs'])\n ])\n ],\n ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n type='NuScenesDatasetOccpancy',\n data_root='data/nuscenes/',\n classes=[\n 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n ],\n modality=dict(\n use_lidar=False,\n use_camera=True,\n use_radar=False,\n use_map=False,\n use_external=False),\n stereo=False,\n filter_empty_gt=False,\n img_info_prototype='bevdet')\nkey = 'test'\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=200,\n warmup_ratio=0.001,\n step=[24])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\ncustom_hooks = [\n dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')\n]\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "flashocc-r50.py"}
{"mode": "train", "epoch": 1, "iter": 1, "lr": 0.0, "memory": 32423, "data_time": 15.43241, "loss_occ": 3.00864, "loss": 3.00864, "grad_norm": 3.90007, "time": 659.74966}
{"mode": "train", "epoch": 1, "iter": 2, "lr": 0.0, "memory": 32769, "data_time": 0.00516, "loss_occ": 3.01287, "loss": 3.01287, "grad_norm": 3.9387, "time": 4.70719}
{"mode": "train", "epoch": 1, "iter": 3, "lr": 0.0, "memory": 32769, "data_time": 0.00255, "loss_occ": 3.015, "loss": 3.015, "grad_norm": 3.89853, "time": 1.13283}
{"mode": "train", "epoch": 1, "iter": 4, "lr": 0.0, "memory": 32769, "data_time": 0.00234, "loss_occ": 3.00854, "loss": 3.00854, "grad_norm": 3.87413, "time": 1.12622}
{"mode": "train", "epoch": 1, "iter": 5, "lr": 0.0, "memory": 32769, "data_time": 0.00279, "loss_occ": 3.0044, "loss": 3.0044, "grad_norm": 3.86394, "time": 1.12671}
{"mode": "train", "epoch": 1, "iter": 6, "lr": 0.0, "memory": 32769, "data_time": 0.00295, "loss_occ": 3.00848, "loss": 3.00848, "grad_norm": 3.84233, "time": 1.12617}
{"mode": "train", "epoch": 1, "iter": 7, "lr": 0.0, "memory": 32769, "data_time": 0.00263, "loss_occ": 3.0085, "loss": 3.0085, "grad_norm": 3.94733, "time": 1.12608}
{"mode": "train", "epoch": 1, "iter": 8, "lr": 0.0, "memory": 32769, "data_time": 0.00604, "loss_occ": 3.00383, "loss": 3.00383, "grad_norm": 3.9215, "time": 1.13024}
{"mode": "train", "epoch": 1, "iter": 9, "lr": 0.0, "memory": 32769, "data_time": 0.0036, "loss_occ": 2.99689, "loss": 2.99689, "grad_norm": 3.86593, "time": 1.1281}
{"mode": "train", "epoch": 1, "iter": 10, "lr": 0.0, "memory": 32769, "data_time": 0.00266, "loss_occ": 2.9957, "loss": 2.9957, "grad_norm": 3.88205, "time": 1.12727}
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='PointToMultiViewDepth',
downsample=1,
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]),
dict(
type='Collect3D',
keys=[
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
'mask_camera'
])
]
test_pipeline = [
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
],
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=24,
workers_per_gpu=24,
train=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
is_train=True,
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='PointToMultiViewDepth',
downsample=1,
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
]),
dict(
type='Collect3D',
keys=[
'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
'mask_camera'
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=False,
box_type_3d='LiDAR',
use_valid_flag=True,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'),
val=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=True,
box_type_3d='LiDAR',
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'),
test=dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
test_mode=True,
box_type_3d='LiDAR',
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet'))
evaluation = dict(
interval=1,
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
start=20)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
log_config = dict(
interval=1,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/flashocc-r50'
load_from = 'ckpts/bevdet-r50-cbgs.pth'
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
data_config = dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0)
grid_config = dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5])
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 64
model = dict(
type='BEVDetOCC',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch'),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=256,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformer',
grid_config=dict(
x=[-40, 40, 0.4],
y=[-40, 40, 0.4],
z=[-1, 5.4, 6.4],
depth=[1.0, 45.0, 0.5]),
input_size=(256, 704),
in_channels=256,
out_channels=64,
sid=False,
collapse_z=True,
downsample=16),
img_bev_encoder_backbone=dict(
type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
img_bev_encoder_neck=dict(
type='FPN_LSS', in_channels=640, out_channels=256),
occ_head=dict(
type='BEVOCCHead2D',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=True,
num_classes=18,
use_predicter=True,
class_balance=False,
loss_occ=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
ignore_index=255,
loss_weight=1.0)))
bda_aug_conf = dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5)
share_data_config = dict(
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet')
test_data_config = dict(
pipeline=[
dict(
type='PrepareImageInputs',
data_config=dict(
cams=[
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
],
Ncams=6,
input_size=(256, 704),
src_size=(900, 1600),
resize=(-0.06, 0.11),
rot=(-5.4, 5.4),
flip=True,
crop_h=(0.0, 0.0),
resize_test=0.0),
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=dict(
rot_lim=(-0.0, 0.0),
scale_lim=(1.0, 1.0),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5),
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
],
ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
type='NuScenesDatasetOccpancy',
data_root='data/nuscenes/',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False),
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet')
key = 'test'
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
]
gpu_ids = range(0, 8)
MapTR @ e03f097a
Subproject commit e03f097abef19e1ba3fed5f471a8d80fbfa0a064
mmdetection3d @ 962f0937
Subproject commit 962f093736ffe55c089bc618842a8b8567318c8c
Sparse4D @ c41df4bb
Subproject commit c41df4bbf7bc82490f11ff55173abfcb3fb91425
......@@ -10,7 +10,7 @@ git clone -b core_v0.12.0 --recurse-submodules http://10.16.6.30/dcutoolkit/deep
cd dcu_megatron-core_v0.12.0/
python setup.py install
cd ../llama-factory
cd ./llama-factory
pip install -r requirements.txt
```
......
......@@ -4,7 +4,7 @@
```
docker run -dit --network=host --name=wan21 --privileged --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size=128G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root --ulimit stack=-1:-1 --ulimit memlock=-1:-1 -v /opt/hyhal:/opt/hyhal:ro -v /public/opendas/DL_DATA/llm-models/:/models:ro harbor.sourcefind.cn:5443/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dtk25.04.4-1230-py3.10-20260115
cd musubi-tuner
cd ./musubi-tuner
pip install -e .
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment