add code

d2b71343 · 雍大凯 · 69e57885 · d2b71343 · d2b71343 · d2b71343
Commit d2b71343 authored Apr 08, 2026 by 雍大凯
19 changed files
--- a/docker-hub/FlashOCC/Flashocc/tools/slurm_test.sh
+++ b/docker-hub/FlashOCC/Flashocc/tools/slurm_test.sh
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --cpus-per-task=${CPUS_PER_TASK} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
--- a/docker-hub/FlashOCC/Flashocc/tools/slurm_train.sh
+++ b/docker-hub/FlashOCC/Flashocc/tools/slurm_train.sh
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+WORK_DIR=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${@:5}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --cpus-per-task=${CPUS_PER_TASK} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
--- a/docker-hub/FlashOCC/Flashocc/tools/test.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/test.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import warnings
+
+import mmcv
+import torch
+from mmcv import Config, DictAction
+from mmcv.cnn import fuse_conv_bn
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
+                         wrap_fp16_model)
+
+import mmdet
+from mmdet3d.apis import single_gpu_test
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_model
+from mmdet.apis import multi_gpu_test, set_random_seed
+from mmdet.datasets import replace_ImageToTensor
+
+if mmdet.__version__ > '2.23.0':
+    # If mmdet version > 2.23.0, setup_multi_processes would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import setup_multi_processes
+else:
+    from mmdet3d.utils import setup_multi_processes
+
+try:
+    # If mmdet version > 2.23.0, compat_cfg would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import compat_cfg
+except ImportError:
+    from mmdet3d.utils import compat_cfg
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='MMDet test (and eval) a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--out', help='output result file in pickle format')
+    parser.add_argument(
+        '--fuse-conv-bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument(
+        '--gpu-ids',
+        type=int,
+        nargs='+',
+        help='(Deprecated, please use --gpu-id) ids of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument(
+        '--gpu-id',
+        type=int,
+        default=0,
+        help='id of gpu to use '
+        '(only applicable to non-distributed testing)')
+    parser.add_argument(
+        '--format-only',
+        action='store_true',
+        help='Format the output results without perform evaluation. It is'
+        'useful when you want to format the result to a specific format and '
+        'submit it to the test server')
+    parser.add_argument(
+        '--eval',
+        type=str,
+        nargs='+',
+        help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
+        ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
+    parser.add_argument('--show', action='store_true', help='show results')
+    parser.add_argument(
+        '--show-dir', help='directory where results will be saved')
+    parser.add_argument(
+        '--gpu-collect',
+        action='store_true',
+        help='whether to use gpu to collect results.')
+    parser.add_argument(
+        '--no-aavt',
+        action='store_true',
+        help='Do not align after view transformer.')
+    parser.add_argument(
+        '--tmpdir',
+        help='tmp directory used for collecting results from multiple '
+        'workers, available when gpu-collect is not specified')
+    parser.add_argument('--seed', type=int, default=0, help='random seed')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    parser.add_argument(
+        '--options',
+        nargs='+',
+        action=DictAction,
+        help='custom options for evaluation, the key-value pair in xxx=yyy '
+        'format will be kwargs for dataset.evaluate() function (deprecate), '
+        'change to --eval-options instead.')
+    parser.add_argument(
+        '--eval-options',
+        nargs='+',
+        action=DictAction,
+        help='custom options for evaluation, the key-value pair in xxx=yyy '
+        'format will be kwargs for dataset.evaluate() function')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+    if args.options and args.eval_options:
+        raise ValueError(
+            '--options and --eval-options cannot be both specified, '
+            '--options is deprecated in favor of --eval-options')
+    if args.options:
+        warnings.warn('--options is deprecated in favor of --eval-options')
+        args.eval_options = args.options
+    return args
+
+
+def main():
+    args = parse_args()
+
+    assert args.out or args.eval or args.format_only or args.show \
+        or args.show_dir, \
+        ('Please specify at least one operation (save/eval/format/show the '
+         'results / save the results) with the argument "--out", "--eval"'
+         ', "--format-only", "--show" or "--show-dir"')
+
+    if args.eval and args.format_only:
+        raise ValueError('--eval and --format_only cannot be both specified')
+
+    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+        raise ValueError('The output file must be a pkl file.')
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    cfg = compat_cfg(cfg)
+
+    # set multi-process settings
+    setup_multi_processes(cfg)
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+
+    cfg.model.pretrained = None
+
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids[0:1]
+        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
+                      'Because we only support single GPU mode in '
+                      'non-distributed testing. Use the first GPU '
+                      'in `gpu_ids` now.')
+    else:
+        cfg.gpu_ids = [args.gpu_id]
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+
+    test_dataloader_default_args = dict(
+        samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)
+
+    # in case the test dataset is concatenated
+    if isinstance(cfg.data.test, dict):
+        cfg.data.test.test_mode = True
+        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.test.pipeline = replace_ImageToTensor(
+                cfg.data.test.pipeline)
+    elif isinstance(cfg.data.test, list):
+        for ds_cfg in cfg.data.test:
+            ds_cfg.test_mode = True
+        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
+            for ds_cfg in cfg.data.test:
+                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
+
+    test_loader_cfg = {
+        **test_dataloader_default_args,
+        **cfg.data.get('test_dataloader', {})
+    }
+
+    # set random seeds
+    if args.seed is not None:
+        set_random_seed(args.seed, deterministic=args.deterministic)
+
+    # build the dataloader
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(dataset, **test_loader_cfg)
+
+    # build the model and load checkpoint
+    if not args.no_aavt:
+        if '4D' in cfg.model.type:
+            cfg.model.align_after_view_transfromation=True
+    cfg.model.train_cfg = None
+    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+    if args.fuse_conv_bn:
+        model = fuse_conv_bn(model)
+    # old versions did not save class info in checkpoints, this walkaround is
+    # for backward compatibility
+    if 'CLASSES' in checkpoint.get('meta', {}):
+        model.CLASSES = checkpoint['meta']['CLASSES']
+    else:
+        model.CLASSES = dataset.CLASSES
+    # palette for visualization in segmentation tasks
+    if 'PALETTE' in checkpoint.get('meta', {}):
+        model.PALETTE = checkpoint['meta']['PALETTE']
+    elif hasattr(dataset, 'PALETTE'):
+        # segmentation dataset has `PALETTE` attribute
+        model.PALETTE = dataset.PALETTE
+
+    if not distributed:
+        model = MMDataParallel(model, device_ids=cfg.gpu_ids)
+        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
+    else:
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False)
+        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+                                 args.gpu_collect)
+
+    rank, _ = get_dist_info()
+    if rank == 0:
+        if args.out:
+            print(f'\nwriting results to {args.out}')
+            mmcv.dump(outputs, args.out)
+        kwargs = {} if args.eval_options is None else args.eval_options
+        if args.format_only:
+            dataset.format_results(outputs, **kwargs)
+        if args.eval:
+            eval_kwargs = cfg.get('evaluation', {}).copy()
+            # hard-code way to remove EvalHook args
+            for key in [
+                    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
+                    'rule'
+            ]:
+                eval_kwargs.pop(key, None)
+            eval_kwargs.update(dict(metric=args.eval, **kwargs))
+            print(dataset.evaluate(outputs, **eval_kwargs))
+
+
+if __name__ == '__main__':
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/train.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/train.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from __future__ import division
+import argparse
+import copy
+import os
+import time
+import warnings
+from os import path as osp
+
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv import Config, DictAction
+from mmcv.runner import get_dist_info, init_dist
+
+from mmdet import __version__ as mmdet_version
+from mmdet3d import __version__ as mmdet3d_version
+from mmdet3d.apis import init_random_seed, train_model
+from mmdet3d.datasets import build_dataset
+from mmdet3d.models import build_model
+from mmdet3d.utils import collect_env, get_root_logger
+from mmdet.apis import set_random_seed
+from mmseg import __version__ as mmseg_version
+
+torch._dynamo.config.capture_scalar_outputs=True
+
+
+try:
+    # If mmdet version > 2.20.0, setup_multi_processes would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import setup_multi_processes
+except ImportError:
+    from mmdet3d.utils import setup_multi_processes
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a detector')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work-dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume-from', help='the checkpoint file to resume from')
+    parser.add_argument(
+        '--auto-resume',
+        action='store_true',
+        help='resume from the latest checkpoint automatically')
+    parser.add_argument(
+        '--validate',
+        action='store_true',
+        help='whether not to evaluate the checkpoint during training')
+    group_gpus = parser.add_mutually_exclusive_group()
+    group_gpus.add_argument(
+        '--gpus',
+        type=int,
+        help='(Deprecated, please use --gpu-id) number of gpus to use '
+        '(only applicable to non-distributed training)')
+    group_gpus.add_argument(
+        '--gpu-ids',
+        type=int,
+        nargs='+',
+        help='(Deprecated, please use --gpu-id) ids of gpus to use '
+        '(only applicable to non-distributed training)')
+    group_gpus.add_argument(
+        '--gpu-id',
+        type=int,
+        default=0,
+        help='number of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument('--seed', type=int, default=0, help='random seed')
+    parser.add_argument(
+        '--diff-seed',
+        action='store_true',
+        help='Whether or not set different seeds for different ranks')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
+    parser.add_argument(
+        '--options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file (deprecate), '
+        'change to --cfg-options instead.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    parser.add_argument(
+        '--autoscale-lr',
+        action='store_true',
+        help='automatically scale lr with the number of gpus')
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+    if args.options and args.cfg_options:
+        raise ValueError(
+            '--options and --cfg-options cannot be both specified, '
+            '--options is deprecated in favor of --cfg-options')
+    if args.options:
+        warnings.warn('--options is deprecated in favor of --cfg-options')
+        args.cfg_options = args.options
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    # set multi-process settings
+    setup_multi_processes(cfg)
+
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                plugin_dir = cfg.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0])
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+
+    if args.auto_resume:
+        cfg.auto_resume = args.auto_resume
+        warnings.warn('`--auto-resume` is only supported when mmdet'
+                      'version >= 2.20.0 for 3D detection model or'
+                      'mmsegmentation verision >= 0.21.0 for 3D'
+                      'segmentation model')
+
+    if args.gpus is not None:
+        cfg.gpu_ids = range(1)
+        warnings.warn('`--gpus` is deprecated because we only support '
+                      'single GPU mode in non-distributed training. '
+                      'Use `gpus=1` now.')
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids[0:1]
+        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
+                      'Because we only support single GPU mode in '
+                      'non-distributed training. Use the first GPU '
+                      'in `gpu_ids` now.')
+    if args.gpus is None and args.gpu_ids is None:
+        cfg.gpu_ids = [args.gpu_id]
+
+    if args.autoscale_lr:
+        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+        # re-set gpu_ids with distributed training mode
+        _, world_size = get_dist_info()
+        cfg.gpu_ids = range(world_size)
+
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # dump config
+    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
+    # init the logger before other steps
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+    # specify logger name, if we still use 'mmdet', the output info will be
+    # filtered and won't be saved in the log_file
+    # TODO: ugly workaround to judge whether we are training det or seg model
+    if cfg.model.type in ['EncoderDecoder3D']:
+        logger_name = 'mmseg'
+    else:
+        logger_name = 'mmdet'
+    logger = get_root_logger(
+        log_file=log_file, log_level=cfg.log_level, name=logger_name)
+
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
+    dash_line = '-' * 60 + '\n'
+    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                dash_line)
+    meta['env_info'] = env_info
+    meta['config'] = cfg.pretty_text
+
+    # log some basic info
+    logger.info(f'Distributed training: {distributed}')
+    logger.info(f'Config:\n{cfg.pretty_text}')
+
+    # set random seeds
+    seed = init_random_seed(args.seed)
+    seed = seed + dist.get_rank() if args.diff_seed else seed
+    logger.info(f'Set random seed to {seed}, '
+                f'deterministic: {args.deterministic}')
+    set_random_seed(seed, deterministic=args.deterministic)
+    cfg.seed = seed
+    meta['seed'] = seed
+    meta['exp_name'] = osp.basename(args.config)
+
+    model = build_model(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg'))
+    model.init_weights()
+
+    logger.info(f'Model:\n{model}')
+    datasets = [build_dataset(cfg.data.train)]
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        # in case we use a dataset wrapper
+        if 'dataset' in cfg.data.train:
+            val_dataset.pipeline = cfg.data.train.dataset.pipeline
+        else:
+            val_dataset.pipeline = cfg.data.train.pipeline
+        # set test_mode=False here in deep copied config
+        # which do not affect AP/AR calculation later
+        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
+        val_dataset.test_mode = False
+        datasets.append(build_dataset(val_dataset))
+    if cfg.checkpoint_config is not None:
+        # save mmdet version, config file content and class names in
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmdet_version=mmdet_version,
+            mmseg_version=mmseg_version,
+            mmdet3d_version=mmdet3d_version,
+            config=cfg.pretty_text,
+            CLASSES=datasets[0].CLASSES,
+            PALETTE=datasets[0].PALETTE  # for segmentors
+            if hasattr(datasets[0], 'PALETTE') else None)
+    # add an attribute for visualization convenience
+    model.CLASSES = datasets[0].CLASSES
+    train_model(
+        model,
+        datasets,
+        cfg,
+        distributed=distributed,
+        validate=args.validate,
+        timestamp=timestamp,
+        meta=meta)
+
+
+if __name__ == '__main__':
+    torch.multiprocessing.set_start_method('fork')
+    main()
--- a/docker-hub/FlashOCC/Flashocc/tools/update_data_coords.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/update_data_coords.py
+import argparse
+import time
+from os import path as osp
+
+import mmcv
+import numpy as np
+
+from mmdet3d.core.bbox import limit_period
+
+
+def update_sunrgbd_infos(root_dir, out_dir, pkl_files):
+    print(f'{pkl_files} will be modified because '
+          f'of the refactor of the Depth coordinate system.')
+    if root_dir == out_dir:
+        print(f'Warning, you are overwriting '
+              f'the original data under {root_dir}.')
+        time.sleep(3)
+    for pkl_file in pkl_files:
+        in_path = osp.join(root_dir, pkl_file)
+        print(f'Reading from input file: {in_path}.')
+        a = mmcv.load(in_path)
+        print('Start updating:')
+        for item in mmcv.track_iter_progress(a):
+            if 'rotation_y' in item['annos']:
+                item['annos']['rotation_y'] = -item['annos']['rotation_y']
+                item['annos']['gt_boxes_upright_depth'][:, -1:] = \
+                    -item['annos']['gt_boxes_upright_depth'][:, -1:]
+
+        out_path = osp.join(out_dir, pkl_file)
+        print(f'Writing to output file: {out_path}.')
+        mmcv.dump(a, out_path, 'pkl')
+
+
+def update_outdoor_dbinfos(root_dir, out_dir, pkl_files):
+    print(f'{pkl_files} will be modified because '
+          f'of the refactor of the LIDAR coordinate system.')
+    if root_dir == out_dir:
+        print(f'Warning, you are overwriting '
+              f'the original data under {root_dir}.')
+        time.sleep(3)
+    for pkl_file in pkl_files:
+        in_path = osp.join(root_dir, pkl_file)
+        print(f'Reading from input file: {in_path}.')
+        a = mmcv.load(in_path)
+        print('Start updating:')
+        for k in a.keys():
+            print(f'Updating samples of class {k}:')
+            for item in mmcv.track_iter_progress(a[k]):
+                boxes = item['box3d_lidar'].copy()
+                # swap l, w (or dx, dy)
+                item['box3d_lidar'][3] = boxes[4]
+                item['box3d_lidar'][4] = boxes[3]
+                # change yaw
+                item['box3d_lidar'][6] = -boxes[6] - np.pi / 2
+                item['box3d_lidar'][6] = limit_period(
+                    item['box3d_lidar'][6], period=np.pi * 2)
+
+        out_path = osp.join(out_dir, pkl_file)
+        print(f'Writing to output file: {out_path}.')
+        mmcv.dump(a, out_path, 'pkl')
+
+
+def update_nuscenes_or_lyft_infos(root_dir, out_dir, pkl_files):
+
+    print(f'{pkl_files} will be modified because '
+          f'of the refactor of the LIDAR coordinate system.')
+    if root_dir == out_dir:
+        print(f'Warning, you are overwriting '
+              f'the original data under {root_dir}.')
+        time.sleep(3)
+    for pkl_file in pkl_files:
+        in_path = osp.join(root_dir, pkl_file)
+        print(f'Reading from input file: {in_path}.')
+        a = mmcv.load(in_path)
+        print('Start updating:')
+        for item in mmcv.track_iter_progress(a['infos']):
+            boxes = item['gt_boxes'].copy()
+            # swap l, w (or dx, dy)
+            item['gt_boxes'][:, 3] = boxes[:, 4]
+            item['gt_boxes'][:, 4] = boxes[:, 3]
+            # change yaw
+            item['gt_boxes'][:, 6] = -boxes[:, 6] - np.pi / 2
+            item['gt_boxes'][:, 6] = limit_period(
+                item['gt_boxes'][:, 6], period=np.pi * 2)
+
+        out_path = osp.join(out_dir, pkl_file)
+        print(f'Writing to output file: {out_path}.')
+        mmcv.dump(a, out_path, 'pkl')
+
+
+parser = argparse.ArgumentParser(description='Arg parser for data coords '
+                                 'update due to coords sys refactor.')
+parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
+parser.add_argument(
+    '--root-dir',
+    type=str,
+    default='./data/kitti',
+    help='specify the root dir of dataset')
+parser.add_argument(
+    '--version',
+    type=str,
+    default='v1.0',
+    required=False,
+    help='specify the dataset version, no need for kitti')
+parser.add_argument(
+    '--out-dir',
+    type=str,
+    default=None,
+    required=False,
+    help='name of info pkl')
+args = parser.parse_args()
+
+if __name__ == '__main__':
+    if args.out_dir is None:
+        args.out_dir = args.root_dir
+    if args.dataset == 'kitti':
+        # KITTI infos is in CAM coord sys (unchanged)
+        # KITTI dbinfos is in LIDAR coord sys (changed)
+        # so we only update dbinfos
+        pkl_files = ['kitti_dbinfos_train.pkl']
+        update_outdoor_dbinfos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+    elif args.dataset == 'nuscenes':
+        # nuScenes infos is in LIDAR coord sys (changed)
+        # nuScenes dbinfos is in LIDAR coord sys (changed)
+        # so we update both infos and dbinfos
+        pkl_files = ['nuscenes_infos_val.pkl']
+        if args.version != 'v1.0-mini':
+            pkl_files.append('nuscenes_infos_train.pkl')
+        else:
+            pkl_files.append('nuscenes_infos_train_tiny.pkl')
+        update_nuscenes_or_lyft_infos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+        if args.version != 'v1.0-mini':
+            pkl_files = ['nuscenes_dbinfos_train.pkl']
+            update_outdoor_dbinfos(
+                root_dir=args.root_dir,
+                out_dir=args.out_dir,
+                pkl_files=pkl_files)
+    elif args.dataset == 'lyft':
+        # Lyft infos is in LIDAR coord sys (changed)
+        # Lyft has no dbinfos
+        # so we update infos
+        pkl_files = ['lyft_infos_train.pkl', 'lyft_infos_val.pkl']
+        update_nuscenes_or_lyft_infos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+    elif args.dataset == 'waymo':
+        # Waymo infos is in CAM coord sys (unchanged)
+        # Waymo dbinfos is in LIDAR coord sys (changed)
+        # so we only update dbinfos
+        pkl_files = ['waymo_dbinfos_train.pkl']
+        update_outdoor_dbinfos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+    elif args.dataset == 'scannet':
+        # ScanNet infos is in DEPTH coord sys (changed)
+        # but bbox is without yaw
+        # so ScanNet is unaffected
+        pass
+    elif args.dataset == 's3dis':
+        # Segmentation datasets are not affected
+        pass
+    elif args.dataset == 'sunrgbd':
+        # SUNRGBD infos is in DEPTH coord sys (changed)
+        # and bbox is with yaw
+        # so we update infos
+        pkl_files = ['sunrgbd_infos_train.pkl', 'sunrgbd_infos_val.pkl']
+        update_sunrgbd_infos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
--- a/docker-hub/FlashOCC/Flashocc/tools/update_data_coords.sh
+++ b/docker-hub/FlashOCC/Flashocc/tools/update_data_coords.sh
+#!/usr/bin/env bash
+
+set -x
+export PYTHONPATH=`pwd`:$PYTHONPATH
+
+PARTITION=$1
+DATASET=$2
+GPUS=${GPUS:-1}
+GPUS_PER_NODE=${GPUS_PER_NODE:-1}
+SRUN_ARGS=${SRUN_ARGS:-""}
+JOB_NAME=update_data_coords
+
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/update_data_coords.py ${DATASET} \
+            --root-dir ./data/${DATASET} \
+            --out-dir ./data/${DATASET}
--- a/docker-hub/FlashOCC/Flashocc/tools/vis_occ.py
+++ b/docker-hub/FlashOCC/Flashocc/tools/vis_occ.py
+import os
+import cv2
+import logging
+import argparse
+import importlib
+import torch
+import numpy as np
+from tqdm import tqdm
+from mmcv import Config, DictAction
+from mmdet.apis import set_random_seed
+from mmdet3d.datasets import build_dataset, build_dataloader
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import load_checkpoint
+from mmdet3d.models import build_model
+import mmdet
+from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
+                         wrap_fp16_model)
+
+import sys
+sys.path.insert(0, os.getcwd())
+
+if mmdet.__version__ > '2.23.0':
+    # If mmdet version > 2.23.0, setup_multi_processes would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import setup_multi_processes
+else:
+    from mmdet3d.utils import setup_multi_processes
+
+try:
+    # If mmdet version > 2.23.0, compat_cfg would be imported and
+    # used from mmdet instead of mmdet3d.
+    from mmdet.utils import compat_cfg
+except ImportError:
+    from mmdet3d.utils import compat_cfg
+
+occ_class_names = [
+    'others', 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle',
+    'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck',
+    'driveable_surface', 'other_flat', 'sidewalk',
+    'terrain', 'manmade', 'vegetation', 'free'
+]
+
+color_map = np.array([
+    [0, 0, 0, 255],    # others
+    [255, 120, 50, 255],  # barrier              orangey
+    [255, 192, 203, 255],  # bicycle              pink
+    [255, 255, 0, 255],  # bus                  yellow
+    [0, 150, 245, 255],  # car                  blue
+    [0, 255, 255, 255],  # construction_vehicle cyan
+    [200, 180, 0, 255],  # motorcycle           dark orange
+    [255, 0, 0, 255],  # pedestrian           red
+    [255, 240, 150, 255],  # traffic_cone         light yellow
+    [135, 60, 0, 255],  # trailer              brown
+    [160, 32, 240, 255],  # truck                purple
+    [255, 0, 255, 255],  # driveable_surface    dark pink
+    [175,   0,  75, 255],       # other_flat           dark red
+    [75, 0, 75, 255],  # sidewalk             dard purple
+    [150, 240, 80, 255],  # terrain              light green
+    [230, 230, 250, 255],  # manmade              white
+    [0, 175, 0, 255],  # vegetation           green
+    [255, 255, 255, 255],  # free             white
+], dtype=np.uint8)
+
+# # from matplotlib import colors
+# # hex_code_list = [
+# #     '#000000', '#D3D3D3', '#BC8F8F', '#F08080', '#A52A2A', '#FF0000', '#FFA07A', '#A0522D', '#FFE4C4', '#FFE4B5',  \
+# #     '#DAA520', '#FFD700', '#F0E68C', '#BDB76B', '#808000', '#FFFF00', '#9ACD32', '#7FFF00', '#8FBC8F', '#90EE90',  \
+# #     '#32CD32', '#008000', '#00FF00', '#00FA9A', '#7FFFD4', '#48D1CC', '#2F4F4F', '#ADD8E6', '#87CEFA', '#DC143C',  \
+# #     '#696969', '#9370DB', '#8A2BE2', '#9400D3', '#DDA0DD', '#FF00FF', '#C71585', '#DB7093', '#FFB6C1', '#bf9b0c',  \
+# #     '#01889f', '#bb3f3f', '#1805db', '#48c072', '#fffd37', '#c44240', '#6140ef', '#ceaefa', '#04f489', '#c6f808',  \
+# #     '#507b9c', '#cffdbc', '#ac7e04', '#01386a', '#ffb7ce', '#ffd1df', '#D2691E', '#FFDAB9', '#a55af4', '#95d0fc',  \
+# #     ]
+# # hex_code_list = np.array(hex_code_list).reshape(6,10).transpose(1,0).reshape(-1)
+# # pano_color_map = np.array([[int(value * 255) for value in colors.hex2color(hex_code)] for hex_code in hex_code_list], dtype=np.uint8)
+
+import matplotlib.pyplot as plt
+from scipy.ndimage import rotate
+def draw_fig(tensor, name='tensor_image_colored_no_white.png'):
+    tensor = tensor.squeeze(0)
+    tensor = rotate(tensor, -90, reshape=False)
+    tensor = np.flip(tensor, axis=1)
+    fig, ax = plt.subplots(figsize=(4, 4))
+    ax.imshow(tensor, cmap='viridis')
+    plt.axis('off')
+    plt.savefig(name, bbox_inches='tight', pad_inches=0)
+    plt.clf()
+    
+def generate_rgb_color(number):
+    red = (number % 256)
+    green = ((number // 256) % 256)
+    blue = ((number // 65536) % 256)
+    return [red, green, blue]
+pano_color_map = np.array([generate_rgb_color(number) for number in np.random.randint(0, 65536*256, 256)])
+
+inst_class_ids=[2, 3, 4, 5, 6, 7, 9, 10]
+
+def occ2img(semantics=None, is_pano=False, panoptics=None):
+    H, W, D = semantics.shape
+
+    free_id = len(occ_class_names) - 1
+    semantics_2d = np.ones([H, W], dtype=np.int32) * free_id
+    for i in range(D):
+        semantics_i = semantics[..., i]
+        non_free_mask = (semantics_i != free_id)
+        semantics_2d[non_free_mask] = semantics_i[non_free_mask]
+
+    viz = color_map[semantics_2d]
+    viz = viz[..., :3]
+
+    inst_mask = np.zeros_like(semantics_2d).astype(np.bool)
+    for ind in inst_class_ids:
+        inst_mask[semantics_2d==ind] = True
+    
+    if is_pano:
+        panoptics_2d = np.ones([H, W], dtype=np.int32) * 0
+        for i in range(D):
+            panoptics_i = panoptics[..., i]
+            semantics_i = semantics[..., i]
+            non_free_mask = (semantics_i != free_id)
+            panoptics_2d[non_free_mask] = panoptics_i[non_free_mask]
+        
+        
+        # # panoptics_2d = panoptics_2d%60
+        
+        
+        viz_pano = pano_color_map[panoptics_2d]
+        viz[inst_mask,:] = viz_pano[inst_mask,:]
+
+    viz = cv2.resize(viz, dsize=(800, 800))
+    return viz
+
+def main():
+    parser = argparse.ArgumentParser(description='Validate a detector')
+    parser.add_argument('--config', required=True)
+    parser.add_argument('--weights', required=True)
+    parser.add_argument('--viz-dir', required=True)
+    parser.add_argument('--override', nargs='+', action=DictAction)
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--draw-sem-gt', action='store_true')
+    parser.add_argument('--draw-pano-gt', action='store_true')
+    parser.add_argument('--surround-view-img', action='store_true')
+    parser.add_argument('--surround-pano-gt', action='store_true')
+    args = parser.parse_args()
+
+    # parse configs
+    cfgs = Config.fromfile(args.config)
+    if args.override is not None:
+        cfgs.merge_from_dict(args.override)
+
+    cfgs = compat_cfg(cfgs)
+
+    # set multi-process settings
+    setup_multi_processes(cfgs)
+    
+    # import modules from plguin/xx, registry will be updated
+    if hasattr(cfgs, 'plugin'):
+        if cfgs.plugin:
+            import importlib
+            if hasattr(cfgs, 'plugin_dir'):
+                plugin_dir = cfgs.plugin_dir
+                _module_dir = os.path.dirname(plugin_dir)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(_module_path)
+                plg_lib = importlib.import_module(_module_path)
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                plg_lib = importlib.import_module(_module_path)
+
+    # use val-mini for visualization
+    #cfgs.data.val.ann_file = cfgs.data.val.ann_file.replace('val', 'val_mini')
+
+    # MMCV, please shut up
+    from mmcv.utils.logging import logger_initialized
+    logger_initialized['root'] = logging.Logger(__name__, logging.WARNING)
+    logger_initialized['mmcv'] = logging.Logger(__name__, logging.WARNING)
+
+    # you need one GPU
+    assert torch.cuda.is_available()
+    # assert torch.cuda.device_count() == 1
+
+    # logging
+    logging.info('Using GPU: %s' % torch.cuda.get_device_name(0))
+
+    # random seed
+    logging.info('Setting random seed: 0')
+    set_random_seed(0, deterministic=True)
+
+    logging.info('Loading validation set from %s' % cfgs.data.val.data_root)
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfgs.dist_params)
+        
+    test_dataloader_default_args = dict(
+        samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)
+    test_loader_cfg = {
+        **test_dataloader_default_args,
+        **cfgs.data.get('test_dataloader', {})
+    }
+    dataset = build_dataset(cfgs.data.test)
+    test_loader_cfg['workers_per_gpu'] = 2
+    val_loader = build_dataloader(dataset, **test_loader_cfg)
+    # val_dataset = build_dataset(cfgs.data.test)
+    # val_loader = build_dataloader(
+    #     val_dataset,
+    #     samples_per_gpu=1,
+    #     workers_per_gpu=1,
+    #     num_gpus=1,
+    #     dist=False,
+    #     shuffle=False,
+    #     seed=0,
+    # )
+
+    logging.info('Creating model: %s' % cfgs.model.type)
+    model = build_model(cfgs.model)
+    model.cuda()
+    model = MMDataParallel(model, [0])
+    model.eval()
+
+    logging.info('Loading checkpoint from %s' % args.weights)
+    load_checkpoint(
+        model, args.weights, map_location='cuda', strict=True,
+        logger=logging.Logger(__name__, logging.ERROR)
+    )
+
+    if not os.path.exists(args.viz_dir):
+        os.makedirs(args.viz_dir)
+        
+    for i, data in tqdm(enumerate(val_loader)):
+
+        with torch.no_grad():
+            occ_pred = model(return_loss=False, rescale=True, **data)[0]
+
+            if False:
+                occ_bev_feature = occ_pred['occ_bev_feature']
+                outs = occ_pred['outs']
+
+                tensor = occ_bev_feature.max(dim=1)[0].cpu()
+                draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-occ_bev_feature.jpg' % i))
+                print(os.path.join(args.viz_dir, '%04d-occ_bev_feature.jpg' % i))
+
+                tensor = outs[0][0]['heatmap'].sigmoid().sum(dim=1)[0].cpu()
+                draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-heatmap.jpg' % i))
+                print(os.path.join(args.viz_dir, '%04d-heatmap.jpg' % i))
+
+                tensor = outs[0][0]['reg'][0,0].cpu()
+                tensor = outs[0][0]['reg'][0,1].cpu()
+                tensor = ((outs[0][0]['reg'][0,0]**2+outs[0][0]['reg'][0,1]**2)**0.5).unsqueeze(dim=0).cpu()
+                draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-reg.jpg' % i))
+                print(os.path.join(args.viz_dir, '%04d-reg.jpg' % i))
+
+                tensor = outs[0][0]['height'][0,0].cpu()
+                draw_fig(tensor, name=os.path.join(args.viz_dir, '%04d-height.jpg' % i))
+                print(os.path.join(args.viz_dir, '%04d-height.jpg' % i))
+
+
+            sem_pred = occ_pred['pred_occ']
+            cv2.imwrite(os.path.join(args.viz_dir, '%04d-sem.jpg' % i), occ2img(semantics=sem_pred.cpu())[..., ::-1])
+            print(os.path.join(args.viz_dir, '%04d-sem.jpg' % i))
+            
+            inst_pred = occ_pred['pano_inst']
+            cv2.imwrite(os.path.join(args.viz_dir, '%04d-inst.jpg' % i), occ2img(semantics=sem_pred.cpu(), is_pano=True, panoptics=inst_pred.cpu())[..., ::-1])
+            print(os.path.join(args.viz_dir, '%04d-inst.jpg' % i))
+            
+            if args.surround_view_img:
+                img = data['img_inputs'][0][0][0][::9].cpu().numpy()
+                mean = np.array([123.675, 116.28, 103.53], dtype=np.float32).reshape(1,3,1,1)
+                std = np.array([58.395, 57.12, 57.375], dtype=np.float32).reshape(1,3,1,1)
+                img = img*std + mean
+                img = img.astype(np.uint8).transpose(0,2,3,1)
+                up = np.concatenate([img[0,...], img[1,...], img[2,...]], 1)
+                down = np.concatenate([img[3,...], img[4,...], img[5,...]], 1)
+                out = np.concatenate([up, down], 0)
+                cv2.imwrite(os.path.join(args.viz_dir, '%04d-rgb.jpg' % i), out)
+                print(os.path.join(args.viz_dir, '%04d-rgb.jpg' % i))
+
+            if args.draw_sem_gt or args.draw_pano_gt:
+                occ_gt = np.load(os.path.join(val_loader.dataset.data_infos[i]['occ_path'].\
+                    replace('data/nuscenes/gts/', 'data/nuscenes/occ3d_panoptic/'), 'labels.npz'))
+                pano_gt = occ_gt['instances']
+                sem_gt = occ_gt['semantics']
+
+            if args.draw_sem_gt:
+                # sem_gt = np.array(data['voxel_semantics'][0])[0]
+                cv2.imwrite(os.path.join(args.viz_dir, '%04d-sem-gt.jpg' % i), occ2img(semantics=sem_gt.cpu())[..., ::-1])
+
+            if args.draw_pano_gt:
+                cv2.imwrite(os.path.join(args.viz_dir, '%04d-pano-gt.jpg' % i), occ2img(semantics=sem_gt, is_pano=True, panoptics=pano_gt)[..., ::-1])
+                print(os.path.join(args.viz_dir, '%04d-pano-gt.jpg' % i))
+
+if __name__ == '__main__':
+    main()
+
+'''
+exp_name=flashoccv2-r50-depth-tiny-pano
+python tools/vis_occ.py --config projects/configs/flashoccv2/${exp_name}.py --weights work_dirs/${exp_name}/epoch_24_ema.pth --viz-dir vis/${exp_name} --draw-gt
+
+exp_name=flashoccv2-r50-depth4d-longterm8f-pano
+python tools/vis_occ.py --config projects/configs/flashoccv2/${exp_name}.py --weights work_dirs/${exp_name}/epoch_24_ema.pth --viz-dir vis/${exp_name} --draw-pano-gt #--draw-gt
+'''
\ No newline at end of file
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log
+2026-04-03 16:24:21,502 - mmdet - INFO - Environment info:
+------------------------------------------------------------
+sys.platform: linux
+Python: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]
+CUDA available: True
+GPU 0,1,2,3,4,5,6,7: BW1000_H
+CUDA_HOME: /opt/dtk
+NVCC: Not Available
+GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+PyTorch: 2.5.1
+PyTorch compiling details: PyTorch built with:
+  - GCC 10.3
+  - C++ Version: 201703
+  - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications
+  - OpenMP 201511 (a.k.a. OpenMP 4.5)
+  - LAPACK is enabled (usually provided by MKL)
+  - NNPACK is enabled
+  - CPU capability usage: AVX512
+  - HIP Runtime 6.3.25521
+  - MIOpen 2.18.0
+  - Magma 2.8.0
+  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, 
+
+TorchVision: 0.20.1
+OpenCV: 4.12.0
+MMCV: 1.6.1
+MMCV Compiler: GCC 10.3
+MMCV CUDA Compiler: rocm not available
+MMDetection: 2.25.1
+MMSegmentation: 0.25.0
+MMDetection3D: 1.0.0rc4+
+spconv2.0: False
+------------------------------------------------------------
+
+2026-04-03 16:24:22,070 - mmdet - INFO - Distributed training: True
+2026-04-03 16:24:22,636 - mmdet - INFO - Config:
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+file_client_args = dict(backend='disk')
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=dict(
+            cams=[
+                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+            ],
+            Ncams=6,
+            input_size=(256, 704),
+            src_size=(900, 1600),
+            resize=(-0.06, 0.11),
+            rot=(-5.4, 5.4),
+            flip=True,
+            crop_h=(0.0, 0.0),
+            resize_test=0.0),
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=dict(
+            rot_lim=(-0.0, 0.0),
+            scale_lim=(1.0, 1.0),
+            flip_dx_ratio=0.5,
+            flip_dy_ratio=0.5),
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='PointToMultiViewDepth',
+        downsample=1,
+        grid_config=dict(
+            x=[-40, 40, 0.4],
+            y=[-40, 40, 0.4],
+            z=[-1, 5.4, 6.4],
+            depth=[1.0, 45.0, 0.5])),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ]),
+    dict(
+        type='Collect3D',
+        keys=[
+            'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
+            'mask_camera'
+        ])
+]
+test_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        data_config=dict(
+            cams=[
+                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+            ],
+            Ncams=6,
+            input_size=(256, 704),
+            src_size=(900, 1600),
+            resize=(-0.06, 0.11),
+            rot=(-5.4, 5.4),
+            flip=True,
+            crop_h=(0.0, 0.0),
+            resize_test=0.0),
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=dict(
+            rot_lim=(-0.0, 0.0),
+            scale_lim=(1.0, 1.0),
+            flip_dx_ratio=0.5,
+            flip_dy_ratio=0.5),
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=[
+            'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+            'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+        ],
+        with_label=False),
+    dict(type='Collect3D', keys=['points'])
+]
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=24,
+    train=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                is_train=True,
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=True),
+            dict(type='LoadOccGTFromFile'),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='PointToMultiViewDepth',
+                downsample=1,
+                grid_config=dict(
+                    x=[-40, 40, 0.4],
+                    y=[-40, 40, 0.4],
+                    z=[-1, 5.4, 6.4],
+                    depth=[1.0, 45.0, 0.5])),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ]),
+            dict(
+                type='Collect3D',
+                keys=[
+                    'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
+                    'mask_camera'
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=False,
+        box_type_3d='LiDAR',
+        use_valid_flag=True,
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'),
+    val=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=False),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='MultiScaleFlipAug3D',
+                img_scale=(1333, 800),
+                pts_scale_ratio=1,
+                flip=False,
+                transforms=[
+                    dict(
+                        type='DefaultFormatBundle3D',
+                        class_names=[
+                            'car', 'truck', 'construction_vehicle', 'bus',
+                            'trailer', 'barrier', 'motorcycle', 'bicycle',
+                            'pedestrian', 'traffic_cone'
+                        ],
+                        with_label=False),
+                    dict(type='Collect3D', keys=['points', 'img_inputs'])
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=True,
+        box_type_3d='LiDAR',
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'),
+    test=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=False),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='MultiScaleFlipAug3D',
+                img_scale=(1333, 800),
+                pts_scale_ratio=1,
+                flip=False,
+                transforms=[
+                    dict(
+                        type='DefaultFormatBundle3D',
+                        class_names=[
+                            'car', 'truck', 'construction_vehicle', 'bus',
+                            'trailer', 'barrier', 'motorcycle', 'bicycle',
+                            'pedestrian', 'traffic_cone'
+                        ],
+                        with_label=False),
+                    dict(type='Collect3D', keys=['points', 'img_inputs'])
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=True,
+        box_type_3d='LiDAR',
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'))
+evaluation = dict(
+    interval=1,
+    pipeline=[
+        dict(
+            type='PrepareImageInputs',
+            data_config=dict(
+                cams=[
+                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                ],
+                Ncams=6,
+                input_size=(256, 704),
+                src_size=(900, 1600),
+                resize=(-0.06, 0.11),
+                rot=(-5.4, 5.4),
+                flip=True,
+                crop_h=(0.0, 0.0),
+                resize_test=0.0),
+            sequential=False),
+        dict(
+            type='LoadAnnotationsBEVDepth',
+            bda_aug_conf=dict(
+                rot_lim=(-0.0, 0.0),
+                scale_lim=(1.0, 1.0),
+                flip_dx_ratio=0.5,
+                flip_dy_ratio=0.5),
+            classes=[
+                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                'traffic_cone'
+            ],
+            is_train=False),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=5,
+            use_dim=5,
+            file_client_args=dict(backend='disk')),
+        dict(
+            type='MultiScaleFlipAug3D',
+            img_scale=(1333, 800),
+            pts_scale_ratio=1,
+            flip=False,
+            transforms=[
+                dict(
+                    type='DefaultFormatBundle3D',
+                    class_names=[
+                        'car', 'truck', 'construction_vehicle', 'bus',
+                        'trailer', 'barrier', 'motorcycle', 'bicycle',
+                        'pedestrian', 'traffic_cone'
+                    ],
+                    with_label=False),
+                dict(type='Collect3D', keys=['points', 'img_inputs'])
+            ])
+    ],
+    start=20)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+log_config = dict(
+    interval=1,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/flashocc-r50'
+load_from = 'ckpts/bevdet-r50-cbgs.pth'
+resume_from = None
+workflow = [('train', 1)]
+opencv_num_threads = 0
+mp_start_method = 'fork'
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+data_config = dict(
+    cams=[
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    Ncams=6,
+    input_size=(256, 704),
+    src_size=(900, 1600),
+    resize=(-0.06, 0.11),
+    rot=(-5.4, 5.4),
+    flip=True,
+    crop_h=(0.0, 0.0),
+    resize_test=0.0)
+grid_config = dict(
+    x=[-40, 40, 0.4],
+    y=[-40, 40, 0.4],
+    z=[-1, 5.4, 6.4],
+    depth=[1.0, 45.0, 0.5])
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 64
+model = dict(
+    type='BEVDetOCC',
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch'),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=256,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformer',
+        grid_config=dict(
+            x=[-40, 40, 0.4],
+            y=[-40, 40, 0.4],
+            z=[-1, 5.4, 6.4],
+            depth=[1.0, 45.0, 0.5]),
+        input_size=(256, 704),
+        in_channels=256,
+        out_channels=64,
+        sid=False,
+        collapse_z=True,
+        downsample=16),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS', in_channels=640, out_channels=256),
+    occ_head=dict(
+        type='BEVOCCHead2D',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=True,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=False,
+        loss_occ=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=False,
+            ignore_index=255,
+            loss_weight=1.0)))
+bda_aug_conf = dict(
+    rot_lim=(-0.0, 0.0),
+    scale_lim=(1.0, 1.0),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5)
+share_data_config = dict(
+    type='NuScenesDatasetOccpancy',
+    data_root='data/nuscenes/',
+    classes=[
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ],
+    modality=dict(
+        use_lidar=False,
+        use_camera=True,
+        use_radar=False,
+        use_map=False,
+        use_external=False),
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet')
+test_data_config = dict(
+    pipeline=[
+        dict(
+            type='PrepareImageInputs',
+            data_config=dict(
+                cams=[
+                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                ],
+                Ncams=6,
+                input_size=(256, 704),
+                src_size=(900, 1600),
+                resize=(-0.06, 0.11),
+                rot=(-5.4, 5.4),
+                flip=True,
+                crop_h=(0.0, 0.0),
+                resize_test=0.0),
+            sequential=False),
+        dict(
+            type='LoadAnnotationsBEVDepth',
+            bda_aug_conf=dict(
+                rot_lim=(-0.0, 0.0),
+                scale_lim=(1.0, 1.0),
+                flip_dx_ratio=0.5,
+                flip_dy_ratio=0.5),
+            classes=[
+                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                'traffic_cone'
+            ],
+            is_train=False),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=5,
+            use_dim=5,
+            file_client_args=dict(backend='disk')),
+        dict(
+            type='MultiScaleFlipAug3D',
+            img_scale=(1333, 800),
+            pts_scale_ratio=1,
+            flip=False,
+            transforms=[
+                dict(
+                    type='DefaultFormatBundle3D',
+                    class_names=[
+                        'car', 'truck', 'construction_vehicle', 'bus',
+                        'trailer', 'barrier', 'motorcycle', 'bicycle',
+                        'pedestrian', 'traffic_cone'
+                    ],
+                    with_label=False),
+                dict(type='Collect3D', keys=['points', 'img_inputs'])
+            ])
+    ],
+    ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+    type='NuScenesDatasetOccpancy',
+    data_root='data/nuscenes/',
+    classes=[
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ],
+    modality=dict(
+        use_lidar=False,
+        use_camera=True,
+        use_radar=False,
+        use_map=False,
+        use_external=False),
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet')
+key = 'test'
+optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+custom_hooks = [
+    dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
+]
+gpu_ids = range(0, 8)
+
+2026-04-03 16:24:22,636 - mmdet - INFO - Set random seed to 0, deterministic: False
+2026-04-03 16:24:22,888 - mmdet - INFO - initialize ResNet with init_cfg [{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
+2026-04-03 16:24:22,995 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,995 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,996 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,996 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,997 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,997 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,998 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:22,999 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,000 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,000 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,001 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,002 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,003 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,005 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,008 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,010 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:24:23,021 - mmdet - INFO - initialize CustomFPN with init_cfg {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
+Name of parameter - Initialization information
+
+img_backbone.conv1.weight - torch.Size([64, 3, 7, 7]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.conv1.weight - torch.Size([64, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.conv2.weight - torch.Size([64, 64, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.bn2.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.bn2.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.conv3.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.bn3.weight - torch.Size([256]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer1.0.bn3.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.downsample.0.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.downsample.1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.downsample.1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.conv1.weight - torch.Size([64, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.1.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.conv2.weight - torch.Size([64, 64, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.1.bn2.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.bn2.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.conv3.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.1.bn3.weight - torch.Size([256]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer1.1.bn3.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.conv1.weight - torch.Size([64, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.2.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.conv2.weight - torch.Size([64, 64, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.2.bn2.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.bn2.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.conv3.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.2.bn3.weight - torch.Size([256]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer1.2.bn3.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.conv1.weight - torch.Size([128, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.0.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.downsample.0.weight - torch.Size([512, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.downsample.1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.downsample.1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.conv1.weight - torch.Size([128, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.1.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.1.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.1.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.1.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.conv1.weight - torch.Size([128, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.2.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.2.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.2.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.2.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.conv1.weight - torch.Size([128, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.3.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.3.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.3.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.3.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.conv1.weight - torch.Size([256, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.0.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.downsample.0.weight - torch.Size([1024, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.downsample.1.weight - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.downsample.1.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.1.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.1.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.1.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.1.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.2.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.2.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.2.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.2.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.3.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.3.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.3.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.3.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.4.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.4.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.4.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.4.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.5.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.5.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.5.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.5.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.conv1.weight - torch.Size([512, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.conv2.weight - torch.Size([512, 512, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.conv3.weight - torch.Size([2048, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.bn3.weight - torch.Size([2048]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer4.0.bn3.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.downsample.0.weight - torch.Size([2048, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.downsample.1.weight - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.downsample.1.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.conv1.weight - torch.Size([512, 2048, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.1.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.conv2.weight - torch.Size([512, 512, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.1.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.conv3.weight - torch.Size([2048, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.1.bn3.weight - torch.Size([2048]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer4.1.bn3.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.conv1.weight - torch.Size([512, 2048, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.2.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.conv2.weight - torch.Size([512, 512, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.2.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.conv3.weight - torch.Size([2048, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.2.bn3.weight - torch.Size([2048]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer4.2.bn3.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_neck.lateral_convs.0.conv.weight - torch.Size([256, 1024, 1, 1]): 
+XavierInit: gain=1, distribution=uniform, bias=0 
+
+img_neck.lateral_convs.0.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_neck.lateral_convs.1.conv.weight - torch.Size([256, 2048, 1, 1]): 
+XavierInit: gain=1, distribution=uniform, bias=0 
+
+img_neck.lateral_convs.1.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_neck.fpn_convs.0.conv.weight - torch.Size([256, 256, 3, 3]): 
+XavierInit: gain=1, distribution=uniform, bias=0 
+
+img_neck.fpn_convs.0.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_view_transformer.depth_net.weight - torch.Size([152, 256, 1, 1]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_view_transformer.depth_net.bias - torch.Size([152]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.conv1.weight - torch.Size([128, 64, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.conv2.weight - torch.Size([128, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.downsample.weight - torch.Size([128, 64, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.downsample.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.conv1.weight - torch.Size([128, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.conv2.weight - torch.Size([128, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.conv1.weight - torch.Size([256, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.conv2.weight - torch.Size([256, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.downsample.weight - torch.Size([256, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.downsample.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.conv1.weight - torch.Size([256, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.conv2.weight - torch.Size([256, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.conv1.weight - torch.Size([512, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.conv2.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.downsample.weight - torch.Size([512, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.downsample.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.conv1.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.conv2.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.0.weight - torch.Size([512, 640, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.3.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.4.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.4.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.1.weight - torch.Size([256, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.4.weight - torch.Size([256, 256, 1, 1]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.4.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.final_conv.conv.weight - torch.Size([256, 256, 3, 3]): 
+Initialized by user-defined `init_weights` in ConvModule  
+
+occ_head.final_conv.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.0.weight - torch.Size([512, 256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.0.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.2.weight - torch.Size([288, 512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.2.bias - torch.Size([288]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+2026-04-03 16:24:23,032 - mmdet - INFO - Model:
+BEVDetOCC(
+  (img_backbone): ResNet(
+    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+    (relu): ReLU(inplace=True)
+    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+    (layer1): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+    (layer2): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+          (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (3): Bottleneck(
+        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+    (layer3): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+          (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (3): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (4): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (5): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+    (layer4): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
+          (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+  )
+  init_cfg=[{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
+  (img_neck): CustomFPN(
+    (lateral_convs): ModuleList(
+      (0): ConvModule(
+        (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
+      )
+      (1): ConvModule(
+        (conv): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
+      )
+    )
+    (fpn_convs): ModuleList(
+      (0): ConvModule(
+        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+      )
+    )
+  )
+  init_cfg={'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
+  (img_view_transformer): LSSViewTransformer(
+    (depth_net): Conv2d(256, 152, kernel_size=(1, 1), stride=(1, 1))
+  )
+  (img_bev_encoder_backbone): CustomResNet(
+    (layers): Sequential(
+      (0): Sequential(
+        (0): BasicBlock(
+          (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+          (downsample): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
+        )
+        (1): BasicBlock(
+          (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+        )
+      )
+      (1): Sequential(
+        (0): BasicBlock(
+          (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+          (downsample): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
+        )
+        (1): BasicBlock(
+          (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+        )
+      )
+      (2): Sequential(
+        (0): BasicBlock(
+          (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+          (downsample): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
+        )
+        (1): BasicBlock(
+          (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+        )
+      )
+    )
+  )
+  (img_bev_encoder_neck): FPN_LSS(
+    (up): Upsample(scale_factor=4.0, mode='bilinear')
+    (conv): Sequential(
+      (0): Conv2d(640, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (2): ReLU(inplace=True)
+      (3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+      (4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (5): ReLU(inplace=True)
+    )
+    (up2): Sequential(
+      (0): Upsample(scale_factor=2.0, mode='bilinear')
+      (1): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+      (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (3): ReLU(inplace=True)
+      (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
+    )
+  )
+  (occ_head): BEVOCCHead2D(
+    (final_conv): ConvModule(
+      (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+      (activate): ReLU(inplace=True)
+    )
+    (predicter): Sequential(
+      (0): Linear(in_features=256, out_features=512, bias=True)
+      (1): Softplus(beta=1.0, threshold=20.0)
+      (2): Linear(in_features=512, out_features=288, bias=True)
+    )
+    (loss_occ): CrossEntropyLoss(avg_non_ignore=False)
+  )
+)
+2026-04-03 16:24:32,890 - mmdet - INFO - load checkpoint from local path: ckpts/bevdet-r50-cbgs.pth
+2026-04-03 16:24:32,998 - mmdet - WARNING - The model and loaded state dict do not match exactly
+
+size mismatch for img_view_transformer.depth_net.weight: copying a param with shape torch.Size([123, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([152, 256, 1, 1]).
+size mismatch for img_view_transformer.depth_net.bias: copying a param with shape torch.Size([123]) from checkpoint, the shape in current model is torch.Size([152]).
+unexpected key in source state_dict: pts_bbox_head.shared_conv.conv.weight, pts_bbox_head.shared_conv.bn.weight, pts_bbox_head.shared_conv.bn.bias, pts_bbox_head.shared_conv.bn.running_mean, pts_bbox_head.shared_conv.bn.running_var, pts_bbox_head.shared_conv.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.0.conv.weight, pts_bbox_head.task_heads.0.reg.0.bn.weight, pts_bbox_head.task_heads.0.reg.0.bn.bias, pts_bbox_head.task_heads.0.reg.0.bn.running_mean, pts_bbox_head.task_heads.0.reg.0.bn.running_var, pts_bbox_head.task_heads.0.reg.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.1.weight, pts_bbox_head.task_heads.0.reg.1.bias, pts_bbox_head.task_heads.0.height.0.conv.weight, pts_bbox_head.task_heads.0.height.0.bn.weight, pts_bbox_head.task_heads.0.height.0.bn.bias, pts_bbox_head.task_heads.0.height.0.bn.running_mean, pts_bbox_head.task_heads.0.height.0.bn.running_var, pts_bbox_head.task_heads.0.height.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.height.1.weight, pts_bbox_head.task_heads.0.height.1.bias, pts_bbox_head.task_heads.0.dim.0.conv.weight, pts_bbox_head.task_heads.0.dim.0.bn.weight, pts_bbox_head.task_heads.0.dim.0.bn.bias, pts_bbox_head.task_heads.0.dim.0.bn.running_mean, pts_bbox_head.task_heads.0.dim.0.bn.running_var, pts_bbox_head.task_heads.0.dim.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.dim.1.weight, pts_bbox_head.task_heads.0.dim.1.bias, pts_bbox_head.task_heads.0.rot.0.conv.weight, pts_bbox_head.task_heads.0.rot.0.bn.weight, pts_bbox_head.task_heads.0.rot.0.bn.bias, pts_bbox_head.task_heads.0.rot.0.bn.running_mean, pts_bbox_head.task_heads.0.rot.0.bn.running_var, pts_bbox_head.task_heads.0.rot.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.rot.1.weight, pts_bbox_head.task_heads.0.rot.1.bias, pts_bbox_head.task_heads.0.vel.0.conv.weight, pts_bbox_head.task_heads.0.vel.0.bn.weight, pts_bbox_head.task_heads.0.vel.0.bn.bias, pts_bbox_head.task_heads.0.vel.0.bn.running_mean, pts_bbox_head.task_heads.0.vel.0.bn.running_var, pts_bbox_head.task_heads.0.vel.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.vel.1.weight, pts_bbox_head.task_heads.0.vel.1.bias, pts_bbox_head.task_heads.0.heatmap.0.conv.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.bias, pts_bbox_head.task_heads.0.heatmap.0.bn.running_mean, pts_bbox_head.task_heads.0.heatmap.0.bn.running_var, pts_bbox_head.task_heads.0.heatmap.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.heatmap.1.weight, pts_bbox_head.task_heads.0.heatmap.1.bias
+
+missing keys in source state_dict: occ_head.final_conv.conv.weight, occ_head.final_conv.conv.bias, occ_head.predicter.0.weight, occ_head.predicter.0.bias, occ_head.predicter.2.weight, occ_head.predicter.2.bias
+
+2026-04-03 16:24:33,000 - mmdet - INFO - Start running, host: root@bw61, work_dir: /workspace/Flashocc/work_dirs/flashocc-r50
+2026-04-03 16:24:33,001 - mmdet - INFO - Hooks will be executed in the following order:
+before_run:
+(VERY_HIGH   ) StepLrUpdaterHook                  
+(NORMAL      ) CheckpointHook                     
+(NORMAL      ) MEGVIIEMAHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_train_epoch:
+(VERY_HIGH   ) StepLrUpdaterHook                  
+(NORMAL      ) DistSamplerSeedHook                
+(LOW         ) IterTimerHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_train_iter:
+(VERY_HIGH   ) StepLrUpdaterHook                  
+(LOW         ) IterTimerHook                      
+ -------------------- 
+after_train_iter:
+(ABOVE_NORMAL) OptimizerHook                      
+(NORMAL      ) CheckpointHook                     
+(NORMAL      ) MEGVIIEMAHook                      
+(LOW         ) IterTimerHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+after_train_epoch:
+(NORMAL      ) CheckpointHook                     
+(NORMAL      ) MEGVIIEMAHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_val_epoch:
+(NORMAL      ) DistSamplerSeedHook                
+(LOW         ) IterTimerHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_val_iter:
+(LOW         ) IterTimerHook                      
+ -------------------- 
+after_val_iter:
+(LOW         ) IterTimerHook                      
+ -------------------- 
+after_val_epoch:
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+after_run:
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+2026-04-03 16:24:33,001 - mmdet - INFO - workflow: [('train', 1)], max: 24 epochs
+2026-04-03 16:24:33,001 - mmdet - INFO - Checkpoints will be saved to /workspace/Flashocc/work_dirs/flashocc-r50 by HardDiskBackend.
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log.json
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162421.log.json
+{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW1000_H\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.5.1\nPyTorch compiling details: PyTorch built with:\n  - GCC 10.3\n  - C++ Version: 201703\n  - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX512\n  - HIP Runtime 6.3.25521\n  - MIOpen 2.18.0\n  - Magma 2.8.0\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.20.1\nOpenCV: 4.12.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 10.3\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.25.1\nMMSegmentation: 0.25.0\nMMDetection3D: 1.0.0rc4+\nspconv2.0: False", "config": "point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\ndataset_type = 'NuScenesDatasetOccpancy'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        data_config=dict(\n            cams=[\n                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n            ],\n            Ncams=6,\n            input_size=(256, 704),\n            src_size=(900, 1600),\n            resize=(-0.06, 0.11),\n            rot=(-5.4, 5.4),\n            flip=True,\n            crop_h=(0.0, 0.0),\n            resize_test=0.0),\n        sequential=False),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=dict(\n            rot_lim=(-0.0, 0.0),\n            scale_lim=(1.0, 1.0),\n            flip_dx_ratio=0.5,\n            flip_dy_ratio=0.5),\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        is_train=True),\n    dict(type='LoadOccGTFromFile'),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='PointToMultiViewDepth',\n        downsample=1,\n        grid_config=dict(\n            x=[-40, 40, 0.4],\n            y=[-40, 40, 0.4],\n            z=[-1, 5.4, 6.4],\n            depth=[1.0, 45.0, 0.5])),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ]),\n    dict(\n        type='Collect3D',\n        keys=[\n            'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n            'mask_camera'\n        ])\n]\ntest_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        data_config=dict(\n            cams=[\n                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n            ],\n            Ncams=6,\n            input_size=(256, 704),\n            src_size=(900, 1600),\n            resize=(-0.06, 0.11),\n            rot=(-5.4, 5.4),\n            flip=True,\n            crop_h=(0.0, 0.0),\n            resize_test=0.0),\n        sequential=False),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=dict(\n            rot_lim=(-0.0, 0.0),\n            scale_lim=(1.0, 1.0),\n            flip_dx_ratio=0.5,\n            flip_dy_ratio=0.5),\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        is_train=False),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs'])\n        ])\n]\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=[\n            'car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n            'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n        ],\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\ndata = dict(\n    samples_per_gpu=24,\n    workers_per_gpu=24,\n    train=dict(\n        type='NuScenesDatasetOccpancy',\n        data_root='data/nuscenes/',\n        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',\n        pipeline=[\n            dict(\n                type='PrepareImageInputs',\n                is_train=True,\n                data_config=dict(\n                    cams=[\n                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                    ],\n                    Ncams=6,\n                    input_size=(256, 704),\n                    src_size=(900, 1600),\n                    resize=(-0.06, 0.11),\n                    rot=(-5.4, 5.4),\n                    flip=True,\n                    crop_h=(0.0, 0.0),\n                    resize_test=0.0),\n                sequential=False),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=dict(\n                    rot_lim=(-0.0, 0.0),\n                    scale_lim=(1.0, 1.0),\n                    flip_dx_ratio=0.5,\n                    flip_dy_ratio=0.5),\n                classes=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                is_train=True),\n            dict(type='LoadOccGTFromFile'),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='PointToMultiViewDepth',\n                downsample=1,\n                grid_config=dict(\n                    x=[-40, 40, 0.4],\n                    y=[-40, 40, 0.4],\n                    z=[-1, 5.4, 6.4],\n                    depth=[1.0, 45.0, 0.5])),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ]),\n            dict(\n                type='Collect3D',\n                keys=[\n                    'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n                    'mask_camera'\n                ])\n        ],\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        modality=dict(\n            use_lidar=False,\n            use_camera=True,\n            use_radar=False,\n            use_map=False,\n            use_external=False),\n        test_mode=False,\n        box_type_3d='LiDAR',\n        use_valid_flag=True,\n        stereo=False,\n        filter_empty_gt=False,\n        img_info_prototype='bevdet'),\n    val=dict(\n        type='NuScenesDatasetOccpancy',\n        data_root='data/nuscenes/',\n        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n        pipeline=[\n            dict(\n                type='PrepareImageInputs',\n                data_config=dict(\n                    cams=[\n                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                    ],\n                    Ncams=6,\n                    input_size=(256, 704),\n                    src_size=(900, 1600),\n                    resize=(-0.06, 0.11),\n                    rot=(-5.4, 5.4),\n                    flip=True,\n                    crop_h=(0.0, 0.0),\n                    resize_test=0.0),\n                sequential=False),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=dict(\n                    rot_lim=(-0.0, 0.0),\n                    scale_lim=(1.0, 1.0),\n                    flip_dx_ratio=0.5,\n                    flip_dy_ratio=0.5),\n                classes=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='MultiScaleFlipAug3D',\n                img_scale=(1333, 800),\n                pts_scale_ratio=1,\n                flip=False,\n                transforms=[\n                    dict(\n                        type='DefaultFormatBundle3D',\n                        class_names=[\n                            'car', 'truck', 'construction_vehicle', 'bus',\n                            'trailer', 'barrier', 'motorcycle', 'bicycle',\n                            'pedestrian', 'traffic_cone'\n                        ],\n                        with_label=False),\n                    dict(type='Collect3D', keys=['points', 'img_inputs'])\n                ])\n        ],\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        modality=dict(\n            use_lidar=False,\n            use_camera=True,\n            use_radar=False,\n            use_map=False,\n            use_external=False),\n        test_mode=True,\n        box_type_3d='LiDAR',\n        stereo=False,\n        filter_empty_gt=False,\n        img_info_prototype='bevdet'),\n    test=dict(\n        type='NuScenesDatasetOccpancy',\n        data_root='data/nuscenes/',\n        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n        pipeline=[\n            dict(\n                type='PrepareImageInputs',\n                data_config=dict(\n                    cams=[\n                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                    ],\n                    Ncams=6,\n                    input_size=(256, 704),\n                    src_size=(900, 1600),\n                    resize=(-0.06, 0.11),\n                    rot=(-5.4, 5.4),\n                    flip=True,\n                    crop_h=(0.0, 0.0),\n                    resize_test=0.0),\n                sequential=False),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=dict(\n                    rot_lim=(-0.0, 0.0),\n                    scale_lim=(1.0, 1.0),\n                    flip_dx_ratio=0.5,\n                    flip_dy_ratio=0.5),\n                classes=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='MultiScaleFlipAug3D',\n                img_scale=(1333, 800),\n                pts_scale_ratio=1,\n                flip=False,\n                transforms=[\n                    dict(\n                        type='DefaultFormatBundle3D',\n                        class_names=[\n                            'car', 'truck', 'construction_vehicle', 'bus',\n                            'trailer', 'barrier', 'motorcycle', 'bicycle',\n                            'pedestrian', 'traffic_cone'\n                        ],\n                        with_label=False),\n                    dict(type='Collect3D', keys=['points', 'img_inputs'])\n                ])\n        ],\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        modality=dict(\n            use_lidar=False,\n            use_camera=True,\n            use_radar=False,\n            use_map=False,\n            use_external=False),\n        test_mode=True,\n        box_type_3d='LiDAR',\n        stereo=False,\n        filter_empty_gt=False,\n        img_info_prototype='bevdet'))\nevaluation = dict(\n    interval=1,\n    pipeline=[\n        dict(\n            type='PrepareImageInputs',\n            data_config=dict(\n                cams=[\n                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                ],\n                Ncams=6,\n                input_size=(256, 704),\n                src_size=(900, 1600),\n                resize=(-0.06, 0.11),\n                rot=(-5.4, 5.4),\n                flip=True,\n                crop_h=(0.0, 0.0),\n                resize_test=0.0),\n            sequential=False),\n        dict(\n            type='LoadAnnotationsBEVDepth',\n            bda_aug_conf=dict(\n                rot_lim=(-0.0, 0.0),\n                scale_lim=(1.0, 1.0),\n                flip_dx_ratio=0.5,\n                flip_dy_ratio=0.5),\n            classes=[\n                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                'traffic_cone'\n            ],\n            is_train=False),\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='MultiScaleFlipAug3D',\n            img_scale=(1333, 800),\n            pts_scale_ratio=1,\n            flip=False,\n            transforms=[\n                dict(\n                    type='DefaultFormatBundle3D',\n                    class_names=[\n                        'car', 'truck', 'construction_vehicle', 'bus',\n                        'trailer', 'barrier', 'motorcycle', 'bicycle',\n                        'pedestrian', 'traffic_cone'\n                    ],\n                    with_label=False),\n                dict(type='Collect3D', keys=['points', 'img_inputs'])\n            ])\n    ],\n    start=20)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=5)\nlog_config = dict(\n    interval=1,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/flashocc-r50'\nload_from = 'ckpts/bevdet-r50-cbgs.pth'\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\nplugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndata_config = dict(\n    cams=[\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    Ncams=6,\n    input_size=(256, 704),\n    src_size=(900, 1600),\n    resize=(-0.06, 0.11),\n    rot=(-5.4, 5.4),\n    flip=True,\n    crop_h=(0.0, 0.0),\n    resize_test=0.0)\ngrid_config = dict(\n    x=[-40, 40, 0.4],\n    y=[-40, 40, 0.4],\n    z=[-1, 5.4, 6.4],\n    depth=[1.0, 45.0, 0.5])\nvoxel_size = [0.1, 0.1, 0.2]\nnumC_Trans = 64\nmodel = dict(\n    type='BEVDetOCC',\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=False,\n        with_cp=True,\n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=256,\n        num_outs=1,\n        start_level=0,\n        out_ids=[0]),\n    img_view_transformer=dict(\n        type='LSSViewTransformer',\n        grid_config=dict(\n            x=[-40, 40, 0.4],\n            y=[-40, 40, 0.4],\n            z=[-1, 5.4, 6.4],\n            depth=[1.0, 45.0, 0.5]),\n        input_size=(256, 704),\n        in_channels=256,\n        out_channels=64,\n        sid=False,\n        collapse_z=True,\n        downsample=16),\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS', in_channels=640, out_channels=256),\n    occ_head=dict(\n        type='BEVOCCHead2D',\n        in_dim=256,\n        out_dim=256,\n        Dz=16,\n        use_mask=True,\n        num_classes=18,\n        use_predicter=True,\n        class_balance=False,\n        loss_occ=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            ignore_index=255,\n            loss_weight=1.0)))\nbda_aug_conf = dict(\n    rot_lim=(-0.0, 0.0),\n    scale_lim=(1.0, 1.0),\n    flip_dx_ratio=0.5,\n    flip_dy_ratio=0.5)\nshare_data_config = dict(\n    type='NuScenesDatasetOccpancy',\n    data_root='data/nuscenes/',\n    classes=[\n        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n    ],\n    modality=dict(\n        use_lidar=False,\n        use_camera=True,\n        use_radar=False,\n        use_map=False,\n        use_external=False),\n    stereo=False,\n    filter_empty_gt=False,\n    img_info_prototype='bevdet')\ntest_data_config = dict(\n    pipeline=[\n        dict(\n            type='PrepareImageInputs',\n            data_config=dict(\n                cams=[\n                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                ],\n                Ncams=6,\n                input_size=(256, 704),\n                src_size=(900, 1600),\n                resize=(-0.06, 0.11),\n                rot=(-5.4, 5.4),\n                flip=True,\n                crop_h=(0.0, 0.0),\n                resize_test=0.0),\n            sequential=False),\n        dict(\n            type='LoadAnnotationsBEVDepth',\n            bda_aug_conf=dict(\n                rot_lim=(-0.0, 0.0),\n                scale_lim=(1.0, 1.0),\n                flip_dx_ratio=0.5,\n                flip_dy_ratio=0.5),\n            classes=[\n                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                'traffic_cone'\n            ],\n            is_train=False),\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='MultiScaleFlipAug3D',\n            img_scale=(1333, 800),\n            pts_scale_ratio=1,\n            flip=False,\n            transforms=[\n                dict(\n                    type='DefaultFormatBundle3D',\n                    class_names=[\n                        'car', 'truck', 'construction_vehicle', 'bus',\n                        'trailer', 'barrier', 'motorcycle', 'bicycle',\n                        'pedestrian', 'traffic_cone'\n                    ],\n                    with_label=False),\n                dict(type='Collect3D', keys=['points', 'img_inputs'])\n            ])\n    ],\n    ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n    type='NuScenesDatasetOccpancy',\n    data_root='data/nuscenes/',\n    classes=[\n        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n    ],\n    modality=dict(\n        use_lidar=False,\n        use_camera=True,\n        use_radar=False,\n        use_map=False,\n        use_external=False),\n    stereo=False,\n    filter_empty_gt=False,\n    img_info_prototype='bevdet')\nkey = 'test'\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=200,\n    warmup_ratio=0.001,\n    step=[24])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\ncustom_hooks = [\n    dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')\n]\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "flashocc-r50.py"}
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log
+2026-04-03 16:26:51,896 - mmdet - INFO - Environment info:
+------------------------------------------------------------
+sys.platform: linux
+Python: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]
+CUDA available: True
+GPU 0,1,2,3,4,5,6,7: BW1000_H
+CUDA_HOME: /opt/dtk
+NVCC: Not Available
+GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+PyTorch: 2.5.1
+PyTorch compiling details: PyTorch built with:
+  - GCC 10.3
+  - C++ Version: 201703
+  - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications
+  - OpenMP 201511 (a.k.a. OpenMP 4.5)
+  - LAPACK is enabled (usually provided by MKL)
+  - NNPACK is enabled
+  - CPU capability usage: AVX512
+  - HIP Runtime 6.3.25521
+  - MIOpen 2.18.0
+  - Magma 2.8.0
+  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, 
+
+TorchVision: 0.20.1
+OpenCV: 4.12.0
+MMCV: 1.6.1
+MMCV Compiler: GCC 10.3
+MMCV CUDA Compiler: rocm not available
+MMDetection: 2.25.1
+MMSegmentation: 0.25.0
+MMDetection3D: 1.0.0rc4+
+spconv2.0: False
+------------------------------------------------------------
+
+2026-04-03 16:26:52,455 - mmdet - INFO - Distributed training: True
+2026-04-03 16:26:53,024 - mmdet - INFO - Config:
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+file_client_args = dict(backend='disk')
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=dict(
+            cams=[
+                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+            ],
+            Ncams=6,
+            input_size=(256, 704),
+            src_size=(900, 1600),
+            resize=(-0.06, 0.11),
+            rot=(-5.4, 5.4),
+            flip=True,
+            crop_h=(0.0, 0.0),
+            resize_test=0.0),
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=dict(
+            rot_lim=(-0.0, 0.0),
+            scale_lim=(1.0, 1.0),
+            flip_dx_ratio=0.5,
+            flip_dy_ratio=0.5),
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='PointToMultiViewDepth',
+        downsample=1,
+        grid_config=dict(
+            x=[-40, 40, 0.4],
+            y=[-40, 40, 0.4],
+            z=[-1, 5.4, 6.4],
+            depth=[1.0, 45.0, 0.5])),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ]),
+    dict(
+        type='Collect3D',
+        keys=[
+            'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
+            'mask_camera'
+        ])
+]
+test_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        data_config=dict(
+            cams=[
+                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+            ],
+            Ncams=6,
+            input_size=(256, 704),
+            src_size=(900, 1600),
+            resize=(-0.06, 0.11),
+            rot=(-5.4, 5.4),
+            flip=True,
+            crop_h=(0.0, 0.0),
+            resize_test=0.0),
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=dict(
+            rot_lim=(-0.0, 0.0),
+            scale_lim=(1.0, 1.0),
+            flip_dx_ratio=0.5,
+            flip_dy_ratio=0.5),
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=[
+            'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+            'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+        ],
+        with_label=False),
+    dict(type='Collect3D', keys=['points'])
+]
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=24,
+    train=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                is_train=True,
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=True),
+            dict(type='LoadOccGTFromFile'),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='PointToMultiViewDepth',
+                downsample=1,
+                grid_config=dict(
+                    x=[-40, 40, 0.4],
+                    y=[-40, 40, 0.4],
+                    z=[-1, 5.4, 6.4],
+                    depth=[1.0, 45.0, 0.5])),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ]),
+            dict(
+                type='Collect3D',
+                keys=[
+                    'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
+                    'mask_camera'
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=False,
+        box_type_3d='LiDAR',
+        use_valid_flag=True,
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'),
+    val=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=False),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='MultiScaleFlipAug3D',
+                img_scale=(1333, 800),
+                pts_scale_ratio=1,
+                flip=False,
+                transforms=[
+                    dict(
+                        type='DefaultFormatBundle3D',
+                        class_names=[
+                            'car', 'truck', 'construction_vehicle', 'bus',
+                            'trailer', 'barrier', 'motorcycle', 'bicycle',
+                            'pedestrian', 'traffic_cone'
+                        ],
+                        with_label=False),
+                    dict(type='Collect3D', keys=['points', 'img_inputs'])
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=True,
+        box_type_3d='LiDAR',
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'),
+    test=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=False),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='MultiScaleFlipAug3D',
+                img_scale=(1333, 800),
+                pts_scale_ratio=1,
+                flip=False,
+                transforms=[
+                    dict(
+                        type='DefaultFormatBundle3D',
+                        class_names=[
+                            'car', 'truck', 'construction_vehicle', 'bus',
+                            'trailer', 'barrier', 'motorcycle', 'bicycle',
+                            'pedestrian', 'traffic_cone'
+                        ],
+                        with_label=False),
+                    dict(type='Collect3D', keys=['points', 'img_inputs'])
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=True,
+        box_type_3d='LiDAR',
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'))
+evaluation = dict(
+    interval=1,
+    pipeline=[
+        dict(
+            type='PrepareImageInputs',
+            data_config=dict(
+                cams=[
+                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                ],
+                Ncams=6,
+                input_size=(256, 704),
+                src_size=(900, 1600),
+                resize=(-0.06, 0.11),
+                rot=(-5.4, 5.4),
+                flip=True,
+                crop_h=(0.0, 0.0),
+                resize_test=0.0),
+            sequential=False),
+        dict(
+            type='LoadAnnotationsBEVDepth',
+            bda_aug_conf=dict(
+                rot_lim=(-0.0, 0.0),
+                scale_lim=(1.0, 1.0),
+                flip_dx_ratio=0.5,
+                flip_dy_ratio=0.5),
+            classes=[
+                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                'traffic_cone'
+            ],
+            is_train=False),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=5,
+            use_dim=5,
+            file_client_args=dict(backend='disk')),
+        dict(
+            type='MultiScaleFlipAug3D',
+            img_scale=(1333, 800),
+            pts_scale_ratio=1,
+            flip=False,
+            transforms=[
+                dict(
+                    type='DefaultFormatBundle3D',
+                    class_names=[
+                        'car', 'truck', 'construction_vehicle', 'bus',
+                        'trailer', 'barrier', 'motorcycle', 'bicycle',
+                        'pedestrian', 'traffic_cone'
+                    ],
+                    with_label=False),
+                dict(type='Collect3D', keys=['points', 'img_inputs'])
+            ])
+    ],
+    start=20)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+log_config = dict(
+    interval=1,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/flashocc-r50'
+load_from = 'ckpts/bevdet-r50-cbgs.pth'
+resume_from = None
+workflow = [('train', 1)]
+opencv_num_threads = 0
+mp_start_method = 'fork'
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+data_config = dict(
+    cams=[
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    Ncams=6,
+    input_size=(256, 704),
+    src_size=(900, 1600),
+    resize=(-0.06, 0.11),
+    rot=(-5.4, 5.4),
+    flip=True,
+    crop_h=(0.0, 0.0),
+    resize_test=0.0)
+grid_config = dict(
+    x=[-40, 40, 0.4],
+    y=[-40, 40, 0.4],
+    z=[-1, 5.4, 6.4],
+    depth=[1.0, 45.0, 0.5])
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 64
+model = dict(
+    type='BEVDetOCC',
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch'),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=256,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformer',
+        grid_config=dict(
+            x=[-40, 40, 0.4],
+            y=[-40, 40, 0.4],
+            z=[-1, 5.4, 6.4],
+            depth=[1.0, 45.0, 0.5]),
+        input_size=(256, 704),
+        in_channels=256,
+        out_channels=64,
+        sid=False,
+        collapse_z=True,
+        downsample=16),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS', in_channels=640, out_channels=256),
+    occ_head=dict(
+        type='BEVOCCHead2D',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=True,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=False,
+        loss_occ=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=False,
+            ignore_index=255,
+            loss_weight=1.0)))
+bda_aug_conf = dict(
+    rot_lim=(-0.0, 0.0),
+    scale_lim=(1.0, 1.0),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5)
+share_data_config = dict(
+    type='NuScenesDatasetOccpancy',
+    data_root='data/nuscenes/',
+    classes=[
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ],
+    modality=dict(
+        use_lidar=False,
+        use_camera=True,
+        use_radar=False,
+        use_map=False,
+        use_external=False),
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet')
+test_data_config = dict(
+    pipeline=[
+        dict(
+            type='PrepareImageInputs',
+            data_config=dict(
+                cams=[
+                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                ],
+                Ncams=6,
+                input_size=(256, 704),
+                src_size=(900, 1600),
+                resize=(-0.06, 0.11),
+                rot=(-5.4, 5.4),
+                flip=True,
+                crop_h=(0.0, 0.0),
+                resize_test=0.0),
+            sequential=False),
+        dict(
+            type='LoadAnnotationsBEVDepth',
+            bda_aug_conf=dict(
+                rot_lim=(-0.0, 0.0),
+                scale_lim=(1.0, 1.0),
+                flip_dx_ratio=0.5,
+                flip_dy_ratio=0.5),
+            classes=[
+                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                'traffic_cone'
+            ],
+            is_train=False),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=5,
+            use_dim=5,
+            file_client_args=dict(backend='disk')),
+        dict(
+            type='MultiScaleFlipAug3D',
+            img_scale=(1333, 800),
+            pts_scale_ratio=1,
+            flip=False,
+            transforms=[
+                dict(
+                    type='DefaultFormatBundle3D',
+                    class_names=[
+                        'car', 'truck', 'construction_vehicle', 'bus',
+                        'trailer', 'barrier', 'motorcycle', 'bicycle',
+                        'pedestrian', 'traffic_cone'
+                    ],
+                    with_label=False),
+                dict(type='Collect3D', keys=['points', 'img_inputs'])
+            ])
+    ],
+    ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+    type='NuScenesDatasetOccpancy',
+    data_root='data/nuscenes/',
+    classes=[
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ],
+    modality=dict(
+        use_lidar=False,
+        use_camera=True,
+        use_radar=False,
+        use_map=False,
+        use_external=False),
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet')
+key = 'test'
+optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+custom_hooks = [
+    dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
+]
+gpu_ids = range(0, 8)
+
+2026-04-03 16:26:53,024 - mmdet - INFO - Set random seed to 0, deterministic: False
+2026-04-03 16:26:53,275 - mmdet - INFO - initialize ResNet with init_cfg [{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
+2026-04-03 16:26:53,383 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,383 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,383 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,384 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,385 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,385 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,386 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,387 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,388 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,388 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,389 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,390 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,391 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,393 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,396 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,399 - mmdet - INFO - initialize Bottleneck with init_cfg {'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+2026-04-03 16:26:53,410 - mmdet - INFO - initialize CustomFPN with init_cfg {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
+Name of parameter - Initialization information
+
+img_backbone.conv1.weight - torch.Size([64, 3, 7, 7]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.conv1.weight - torch.Size([64, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.conv2.weight - torch.Size([64, 64, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.bn2.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.bn2.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.conv3.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.bn3.weight - torch.Size([256]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer1.0.bn3.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.downsample.0.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.0.downsample.1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.0.downsample.1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.conv1.weight - torch.Size([64, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.1.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.conv2.weight - torch.Size([64, 64, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.1.bn2.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.bn2.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.1.conv3.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.1.bn3.weight - torch.Size([256]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer1.1.bn3.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.conv1.weight - torch.Size([64, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.2.bn1.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.bn1.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.conv2.weight - torch.Size([64, 64, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.2.bn2.weight - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.bn2.bias - torch.Size([64]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer1.2.conv3.weight - torch.Size([256, 64, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer1.2.bn3.weight - torch.Size([256]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer1.2.bn3.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.conv1.weight - torch.Size([128, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.0.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.downsample.0.weight - torch.Size([512, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.0.downsample.1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.0.downsample.1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.conv1.weight - torch.Size([128, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.1.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.1.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.1.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.1.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.1.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.conv1.weight - torch.Size([128, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.2.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.2.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.2.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.2.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.2.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.conv1.weight - torch.Size([128, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.3.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.conv2.weight - torch.Size([128, 128, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.3.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer2.3.conv3.weight - torch.Size([512, 128, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer2.3.bn3.weight - torch.Size([512]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer2.3.bn3.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.conv1.weight - torch.Size([256, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.0.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.downsample.0.weight - torch.Size([1024, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.0.downsample.1.weight - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.0.downsample.1.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.1.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.1.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.1.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.1.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.1.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.2.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.2.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.2.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.2.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.2.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.3.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.3.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.3.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.3.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.3.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.4.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.4.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.4.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.4.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.4.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.conv1.weight - torch.Size([256, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.5.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.conv2.weight - torch.Size([256, 256, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.5.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer3.5.conv3.weight - torch.Size([1024, 256, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer3.5.bn3.weight - torch.Size([1024]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer3.5.bn3.bias - torch.Size([1024]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.conv1.weight - torch.Size([512, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.conv2.weight - torch.Size([512, 512, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.conv3.weight - torch.Size([2048, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.bn3.weight - torch.Size([2048]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer4.0.bn3.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.downsample.0.weight - torch.Size([2048, 1024, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.0.downsample.1.weight - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.0.downsample.1.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.conv1.weight - torch.Size([512, 2048, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.1.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.conv2.weight - torch.Size([512, 512, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.1.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.1.conv3.weight - torch.Size([2048, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.1.bn3.weight - torch.Size([2048]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer4.1.bn3.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.conv1.weight - torch.Size([512, 2048, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.2.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.conv2.weight - torch.Size([512, 512, 3, 3]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.2.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_backbone.layer4.2.conv3.weight - torch.Size([2048, 512, 1, 1]): 
+KaimingInit: a=0, mode=fan_out, nonlinearity=relu, distribution =normal, bias=0 
+
+img_backbone.layer4.2.bn3.weight - torch.Size([2048]): 
+ConstantInit: val=0, bias=0 
+
+img_backbone.layer4.2.bn3.bias - torch.Size([2048]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_neck.lateral_convs.0.conv.weight - torch.Size([256, 1024, 1, 1]): 
+XavierInit: gain=1, distribution=uniform, bias=0 
+
+img_neck.lateral_convs.0.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_neck.lateral_convs.1.conv.weight - torch.Size([256, 2048, 1, 1]): 
+XavierInit: gain=1, distribution=uniform, bias=0 
+
+img_neck.lateral_convs.1.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_neck.fpn_convs.0.conv.weight - torch.Size([256, 256, 3, 3]): 
+XavierInit: gain=1, distribution=uniform, bias=0 
+
+img_neck.fpn_convs.0.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_view_transformer.depth_net.weight - torch.Size([152, 256, 1, 1]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_view_transformer.depth_net.bias - torch.Size([152]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.conv1.weight - torch.Size([128, 64, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.conv2.weight - torch.Size([128, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.downsample.weight - torch.Size([128, 64, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.0.downsample.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.conv1.weight - torch.Size([128, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn1.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn1.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.conv2.weight - torch.Size([128, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn2.weight - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.0.1.bn2.bias - torch.Size([128]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.conv1.weight - torch.Size([256, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.conv2.weight - torch.Size([256, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.downsample.weight - torch.Size([256, 128, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.0.downsample.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.conv1.weight - torch.Size([256, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn1.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn1.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.conv2.weight - torch.Size([256, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.1.1.bn2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.conv1.weight - torch.Size([512, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.conv2.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.downsample.weight - torch.Size([512, 256, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.0.downsample.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.conv1.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.conv2.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn2.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_backbone.layers.2.1.bn2.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.0.weight - torch.Size([512, 640, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.1.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.1.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.3.weight - torch.Size([512, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.4.weight - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.conv.4.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.1.weight - torch.Size([256, 512, 3, 3]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.2.weight - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.2.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.4.weight - torch.Size([256, 256, 1, 1]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+img_bev_encoder_neck.up2.4.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.final_conv.conv.weight - torch.Size([256, 256, 3, 3]): 
+Initialized by user-defined `init_weights` in ConvModule  
+
+occ_head.final_conv.conv.bias - torch.Size([256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.0.weight - torch.Size([512, 256]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.0.bias - torch.Size([512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.2.weight - torch.Size([288, 512]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+
+occ_head.predicter.2.bias - torch.Size([288]): 
+The value is the same before and after calling `init_weights` of BEVDetOCC  
+2026-04-03 16:26:53,421 - mmdet - INFO - Model:
+BEVDetOCC(
+  (img_backbone): ResNet(
+    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+    (relu): ReLU(inplace=True)
+    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+    (layer1): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+    (layer2): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+          (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (3): Bottleneck(
+        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+    (layer3): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+          (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (3): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (4): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (5): Bottleneck(
+        (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+    (layer4): ResLayer(
+      (0): Bottleneck(
+        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+        (downsample): Sequential(
+          (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
+          (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        )
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (1): Bottleneck(
+        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+      (2): Bottleneck(
+        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (relu): ReLU(inplace=True)
+      )
+      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
+    )
+  )
+  init_cfg=[{'type': 'Kaiming', 'layer': 'Conv2d'}, {'type': 'Constant', 'val': 1, 'layer': ['_BatchNorm', 'GroupNorm']}]
+  (img_neck): CustomFPN(
+    (lateral_convs): ModuleList(
+      (0): ConvModule(
+        (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
+      )
+      (1): ConvModule(
+        (conv): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
+      )
+    )
+    (fpn_convs): ModuleList(
+      (0): ConvModule(
+        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+      )
+    )
+  )
+  init_cfg={'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
+  (img_view_transformer): LSSViewTransformer(
+    (depth_net): Conv2d(256, 152, kernel_size=(1, 1), stride=(1, 1))
+  )
+  (img_bev_encoder_backbone): CustomResNet(
+    (layers): Sequential(
+      (0): Sequential(
+        (0): BasicBlock(
+          (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+          (downsample): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
+        )
+        (1): BasicBlock(
+          (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+        )
+      )
+      (1): Sequential(
+        (0): BasicBlock(
+          (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+          (downsample): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
+        )
+        (1): BasicBlock(
+          (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+        )
+      )
+      (2): Sequential(
+        (0): BasicBlock(
+          (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+          (downsample): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
+        )
+        (1): BasicBlock(
+          (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (relu): ReLU(inplace=True)
+        )
+      )
+    )
+  )
+  (img_bev_encoder_neck): FPN_LSS(
+    (up): Upsample(scale_factor=4.0, mode='bilinear')
+    (conv): Sequential(
+      (0): Conv2d(640, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (2): ReLU(inplace=True)
+      (3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+      (4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (5): ReLU(inplace=True)
+    )
+    (up2): Sequential(
+      (0): Upsample(scale_factor=2.0, mode='bilinear')
+      (1): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+      (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (3): ReLU(inplace=True)
+      (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
+    )
+  )
+  (occ_head): BEVOCCHead2D(
+    (final_conv): ConvModule(
+      (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+      (activate): ReLU(inplace=True)
+    )
+    (predicter): Sequential(
+      (0): Linear(in_features=256, out_features=512, bias=True)
+      (1): Softplus(beta=1.0, threshold=20.0)
+      (2): Linear(in_features=512, out_features=288, bias=True)
+    )
+    (loss_occ): CrossEntropyLoss(avg_non_ignore=False)
+  )
+)
+2026-04-03 16:26:58,319 - mmdet - INFO - load checkpoint from local path: ckpts/bevdet-r50-cbgs.pth
+2026-04-03 16:26:58,420 - mmdet - WARNING - The model and loaded state dict do not match exactly
+
+size mismatch for img_view_transformer.depth_net.weight: copying a param with shape torch.Size([123, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([152, 256, 1, 1]).
+size mismatch for img_view_transformer.depth_net.bias: copying a param with shape torch.Size([123]) from checkpoint, the shape in current model is torch.Size([152]).
+unexpected key in source state_dict: pts_bbox_head.shared_conv.conv.weight, pts_bbox_head.shared_conv.bn.weight, pts_bbox_head.shared_conv.bn.bias, pts_bbox_head.shared_conv.bn.running_mean, pts_bbox_head.shared_conv.bn.running_var, pts_bbox_head.shared_conv.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.0.conv.weight, pts_bbox_head.task_heads.0.reg.0.bn.weight, pts_bbox_head.task_heads.0.reg.0.bn.bias, pts_bbox_head.task_heads.0.reg.0.bn.running_mean, pts_bbox_head.task_heads.0.reg.0.bn.running_var, pts_bbox_head.task_heads.0.reg.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.reg.1.weight, pts_bbox_head.task_heads.0.reg.1.bias, pts_bbox_head.task_heads.0.height.0.conv.weight, pts_bbox_head.task_heads.0.height.0.bn.weight, pts_bbox_head.task_heads.0.height.0.bn.bias, pts_bbox_head.task_heads.0.height.0.bn.running_mean, pts_bbox_head.task_heads.0.height.0.bn.running_var, pts_bbox_head.task_heads.0.height.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.height.1.weight, pts_bbox_head.task_heads.0.height.1.bias, pts_bbox_head.task_heads.0.dim.0.conv.weight, pts_bbox_head.task_heads.0.dim.0.bn.weight, pts_bbox_head.task_heads.0.dim.0.bn.bias, pts_bbox_head.task_heads.0.dim.0.bn.running_mean, pts_bbox_head.task_heads.0.dim.0.bn.running_var, pts_bbox_head.task_heads.0.dim.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.dim.1.weight, pts_bbox_head.task_heads.0.dim.1.bias, pts_bbox_head.task_heads.0.rot.0.conv.weight, pts_bbox_head.task_heads.0.rot.0.bn.weight, pts_bbox_head.task_heads.0.rot.0.bn.bias, pts_bbox_head.task_heads.0.rot.0.bn.running_mean, pts_bbox_head.task_heads.0.rot.0.bn.running_var, pts_bbox_head.task_heads.0.rot.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.rot.1.weight, pts_bbox_head.task_heads.0.rot.1.bias, pts_bbox_head.task_heads.0.vel.0.conv.weight, pts_bbox_head.task_heads.0.vel.0.bn.weight, pts_bbox_head.task_heads.0.vel.0.bn.bias, pts_bbox_head.task_heads.0.vel.0.bn.running_mean, pts_bbox_head.task_heads.0.vel.0.bn.running_var, pts_bbox_head.task_heads.0.vel.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.vel.1.weight, pts_bbox_head.task_heads.0.vel.1.bias, pts_bbox_head.task_heads.0.heatmap.0.conv.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.weight, pts_bbox_head.task_heads.0.heatmap.0.bn.bias, pts_bbox_head.task_heads.0.heatmap.0.bn.running_mean, pts_bbox_head.task_heads.0.heatmap.0.bn.running_var, pts_bbox_head.task_heads.0.heatmap.0.bn.num_batches_tracked, pts_bbox_head.task_heads.0.heatmap.1.weight, pts_bbox_head.task_heads.0.heatmap.1.bias
+
+missing keys in source state_dict: occ_head.final_conv.conv.weight, occ_head.final_conv.conv.bias, occ_head.predicter.0.weight, occ_head.predicter.0.bias, occ_head.predicter.2.weight, occ_head.predicter.2.bias
+
+2026-04-03 16:26:58,422 - mmdet - INFO - Start running, host: root@bw61, work_dir: /workspace/Flashocc/work_dirs/flashocc-r50
+2026-04-03 16:26:58,422 - mmdet - INFO - Hooks will be executed in the following order:
+before_run:
+(VERY_HIGH   ) StepLrUpdaterHook                  
+(NORMAL      ) CheckpointHook                     
+(NORMAL      ) MEGVIIEMAHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_train_epoch:
+(VERY_HIGH   ) StepLrUpdaterHook                  
+(NORMAL      ) DistSamplerSeedHook                
+(LOW         ) IterTimerHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_train_iter:
+(VERY_HIGH   ) StepLrUpdaterHook                  
+(LOW         ) IterTimerHook                      
+ -------------------- 
+after_train_iter:
+(ABOVE_NORMAL) OptimizerHook                      
+(NORMAL      ) CheckpointHook                     
+(NORMAL      ) MEGVIIEMAHook                      
+(LOW         ) IterTimerHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+after_train_epoch:
+(NORMAL      ) CheckpointHook                     
+(NORMAL      ) MEGVIIEMAHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_val_epoch:
+(NORMAL      ) DistSamplerSeedHook                
+(LOW         ) IterTimerHook                      
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+before_val_iter:
+(LOW         ) IterTimerHook                      
+ -------------------- 
+after_val_iter:
+(LOW         ) IterTimerHook                      
+ -------------------- 
+after_val_epoch:
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+after_run:
+(VERY_LOW    ) TextLoggerHook                     
+(VERY_LOW    ) TensorboardLoggerHook              
+ -------------------- 
+2026-04-03 16:26:58,423 - mmdet - INFO - workflow: [('train', 1)], max: 24 epochs
+2026-04-03 16:26:58,423 - mmdet - INFO - Checkpoints will be saved to /workspace/Flashocc/work_dirs/flashocc-r50 by HardDiskBackend.
+2026-04-03 16:37:58,297 - mmdet - INFO - Epoch [1][1/147]	lr: 1.000e-07, eta: 26 days, 22:22:17, time: 659.750, data_time: 15.432, memory: 32423, loss_occ: 3.0086, loss: 3.0086, grad_norm: 3.9001
+2026-04-03 16:38:03,001 - mmdet - INFO - Epoch [1][2/147]	lr: 5.995e-07, eta: 13 days, 13:23:57, time: 4.707, data_time: 0.005, memory: 32769, loss_occ: 3.0129, loss: 3.0129, grad_norm: 3.9387
+2026-04-03 16:38:04,134 - mmdet - INFO - Epoch [1][3/147]	lr: 1.099e-06, eta: 9 days, 1:14:27, time: 1.133, data_time: 0.003, memory: 32769, loss_occ: 3.0150, loss: 3.0150, grad_norm: 3.8985
+2026-04-03 16:38:05,261 - mmdet - INFO - Epoch [1][4/147]	lr: 1.599e-06, eta: 6 days, 19:09:36, time: 1.126, data_time: 0.002, memory: 32769, loss_occ: 3.0085, loss: 3.0085, grad_norm: 3.8741
+2026-04-03 16:38:06,388 - mmdet - INFO - Epoch [1][5/147]	lr: 2.098e-06, eta: 5 days, 10:42:41, time: 1.127, data_time: 0.003, memory: 32769, loss_occ: 3.0044, loss: 3.0044, grad_norm: 3.8639
+2026-04-03 16:38:07,513 - mmdet - INFO - Epoch [1][6/147]	lr: 2.597e-06, eta: 4 days, 13:04:44, time: 1.126, data_time: 0.003, memory: 32769, loss_occ: 3.0085, loss: 3.0085, grad_norm: 3.8423
+2026-04-03 16:38:08,643 - mmdet - INFO - Epoch [1][7/147]	lr: 3.097e-06, eta: 3 days, 21:37:37, time: 1.126, data_time: 0.003, memory: 32769, loss_occ: 3.0085, loss: 3.0085, grad_norm: 3.9473
+2026-04-03 16:38:09,771 - mmdet - INFO - Epoch [1][8/147]	lr: 3.597e-06, eta: 3 days, 10:02:19, time: 1.130, data_time: 0.006, memory: 32769, loss_occ: 3.0038, loss: 3.0038, grad_norm: 3.9215
+2026-04-03 16:38:10,898 - mmdet - INFO - Epoch [1][9/147]	lr: 4.096e-06, eta: 3 days, 1:01:30, time: 1.128, data_time: 0.004, memory: 32769, loss_occ: 2.9969, loss: 2.9969, grad_norm: 3.8659
+2026-04-03 16:38:12,026 - mmdet - INFO - Epoch [1][10/147]	lr: 4.596e-06, eta: 2 days, 17:48:50, time: 1.127, data_time: 0.003, memory: 32769, loss_occ: 2.9957, loss: 2.9957, grad_norm: 3.8820
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log.json
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/20260403_162651.log.json
+{"env_info": "sys.platform: linux\nPython: 3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: BW1000_H\nCUDA_HOME: /opt/dtk\nNVCC: Not Available\nGCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nPyTorch: 2.5.1\nPyTorch compiling details: PyTorch built with:\n  - GCC 10.3\n  - C++ Version: 201703\n  - Intel(R) Math Kernel Library Version 2020.0.4 Product Build 20200917 for Intel(R) 64 architecture applications\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX512\n  - HIP Runtime 6.3.25521\n  - MIOpen 2.18.0\n  - Magma 2.8.0\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/gcc-toolset-10/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.5.1, USE_CUDA=0, USE_CUDNN=OFF, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=1, USE_GLOG=1, USE_GLOO=1, USE_MKL=ON, USE_MKLDNN=0, USE_MPI=1, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=1, USE_ROCM=ON, USE_ROCM_KERNEL_ASSERT=OFF, \n\nTorchVision: 0.20.1\nOpenCV: 4.12.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 10.3\nMMCV CUDA Compiler: rocm not available\nMMDetection: 2.25.1\nMMSegmentation: 0.25.0\nMMDetection3D: 1.0.0rc4+\nspconv2.0: False", "config": "point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]\nclass_names = [\n    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n]\ndataset_type = 'NuScenesDatasetOccpancy'\ndata_root = 'data/nuscenes/'\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\ntrain_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        is_train=True,\n        data_config=dict(\n            cams=[\n                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n            ],\n            Ncams=6,\n            input_size=(256, 704),\n            src_size=(900, 1600),\n            resize=(-0.06, 0.11),\n            rot=(-5.4, 5.4),\n            flip=True,\n            crop_h=(0.0, 0.0),\n            resize_test=0.0),\n        sequential=False),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=dict(\n            rot_lim=(-0.0, 0.0),\n            scale_lim=(1.0, 1.0),\n            flip_dx_ratio=0.5,\n            flip_dy_ratio=0.5),\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        is_train=True),\n    dict(type='LoadOccGTFromFile'),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='PointToMultiViewDepth',\n        downsample=1,\n        grid_config=dict(\n            x=[-40, 40, 0.4],\n            y=[-40, 40, 0.4],\n            z=[-1, 5.4, 6.4],\n            depth=[1.0, 45.0, 0.5])),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ]),\n    dict(\n        type='Collect3D',\n        keys=[\n            'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n            'mask_camera'\n        ])\n]\ntest_pipeline = [\n    dict(\n        type='PrepareImageInputs',\n        data_config=dict(\n            cams=[\n                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n            ],\n            Ncams=6,\n            input_size=(256, 704),\n            src_size=(900, 1600),\n            resize=(-0.06, 0.11),\n            rot=(-5.4, 5.4),\n            flip=True,\n            crop_h=(0.0, 0.0),\n            resize_test=0.0),\n        sequential=False),\n    dict(\n        type='LoadAnnotationsBEVDepth',\n        bda_aug_conf=dict(\n            rot_lim=(-0.0, 0.0),\n            scale_lim=(1.0, 1.0),\n            flip_dx_ratio=0.5,\n            flip_dy_ratio=0.5),\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        is_train=False),\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                with_label=False),\n            dict(type='Collect3D', keys=['points', 'img_inputs'])\n        ])\n]\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=dict(backend='disk')),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=[\n            'car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n            'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n        ],\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\ndata = dict(\n    samples_per_gpu=24,\n    workers_per_gpu=24,\n    train=dict(\n        type='NuScenesDatasetOccpancy',\n        data_root='data/nuscenes/',\n        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',\n        pipeline=[\n            dict(\n                type='PrepareImageInputs',\n                is_train=True,\n                data_config=dict(\n                    cams=[\n                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                    ],\n                    Ncams=6,\n                    input_size=(256, 704),\n                    src_size=(900, 1600),\n                    resize=(-0.06, 0.11),\n                    rot=(-5.4, 5.4),\n                    flip=True,\n                    crop_h=(0.0, 0.0),\n                    resize_test=0.0),\n                sequential=False),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=dict(\n                    rot_lim=(-0.0, 0.0),\n                    scale_lim=(1.0, 1.0),\n                    flip_dx_ratio=0.5,\n                    flip_dy_ratio=0.5),\n                classes=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                is_train=True),\n            dict(type='LoadOccGTFromFile'),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='PointToMultiViewDepth',\n                downsample=1,\n                grid_config=dict(\n                    x=[-40, 40, 0.4],\n                    y=[-40, 40, 0.4],\n                    z=[-1, 5.4, 6.4],\n                    depth=[1.0, 45.0, 0.5])),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ]),\n            dict(\n                type='Collect3D',\n                keys=[\n                    'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',\n                    'mask_camera'\n                ])\n        ],\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        modality=dict(\n            use_lidar=False,\n            use_camera=True,\n            use_radar=False,\n            use_map=False,\n            use_external=False),\n        test_mode=False,\n        box_type_3d='LiDAR',\n        use_valid_flag=True,\n        stereo=False,\n        filter_empty_gt=False,\n        img_info_prototype='bevdet'),\n    val=dict(\n        type='NuScenesDatasetOccpancy',\n        data_root='data/nuscenes/',\n        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n        pipeline=[\n            dict(\n                type='PrepareImageInputs',\n                data_config=dict(\n                    cams=[\n                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                    ],\n                    Ncams=6,\n                    input_size=(256, 704),\n                    src_size=(900, 1600),\n                    resize=(-0.06, 0.11),\n                    rot=(-5.4, 5.4),\n                    flip=True,\n                    crop_h=(0.0, 0.0),\n                    resize_test=0.0),\n                sequential=False),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=dict(\n                    rot_lim=(-0.0, 0.0),\n                    scale_lim=(1.0, 1.0),\n                    flip_dx_ratio=0.5,\n                    flip_dy_ratio=0.5),\n                classes=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='MultiScaleFlipAug3D',\n                img_scale=(1333, 800),\n                pts_scale_ratio=1,\n                flip=False,\n                transforms=[\n                    dict(\n                        type='DefaultFormatBundle3D',\n                        class_names=[\n                            'car', 'truck', 'construction_vehicle', 'bus',\n                            'trailer', 'barrier', 'motorcycle', 'bicycle',\n                            'pedestrian', 'traffic_cone'\n                        ],\n                        with_label=False),\n                    dict(type='Collect3D', keys=['points', 'img_inputs'])\n                ])\n        ],\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        modality=dict(\n            use_lidar=False,\n            use_camera=True,\n            use_radar=False,\n            use_map=False,\n            use_external=False),\n        test_mode=True,\n        box_type_3d='LiDAR',\n        stereo=False,\n        filter_empty_gt=False,\n        img_info_prototype='bevdet'),\n    test=dict(\n        type='NuScenesDatasetOccpancy',\n        data_root='data/nuscenes/',\n        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n        pipeline=[\n            dict(\n                type='PrepareImageInputs',\n                data_config=dict(\n                    cams=[\n                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                    ],\n                    Ncams=6,\n                    input_size=(256, 704),\n                    src_size=(900, 1600),\n                    resize=(-0.06, 0.11),\n                    rot=(-5.4, 5.4),\n                    flip=True,\n                    crop_h=(0.0, 0.0),\n                    resize_test=0.0),\n                sequential=False),\n            dict(\n                type='LoadAnnotationsBEVDepth',\n                bda_aug_conf=dict(\n                    rot_lim=(-0.0, 0.0),\n                    scale_lim=(1.0, 1.0),\n                    flip_dx_ratio=0.5,\n                    flip_dy_ratio=0.5),\n                classes=[\n                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                    'traffic_cone'\n                ],\n                is_train=False),\n            dict(\n                type='LoadPointsFromFile',\n                coord_type='LIDAR',\n                load_dim=5,\n                use_dim=5,\n                file_client_args=dict(backend='disk')),\n            dict(\n                type='MultiScaleFlipAug3D',\n                img_scale=(1333, 800),\n                pts_scale_ratio=1,\n                flip=False,\n                transforms=[\n                    dict(\n                        type='DefaultFormatBundle3D',\n                        class_names=[\n                            'car', 'truck', 'construction_vehicle', 'bus',\n                            'trailer', 'barrier', 'motorcycle', 'bicycle',\n                            'pedestrian', 'traffic_cone'\n                        ],\n                        with_label=False),\n                    dict(type='Collect3D', keys=['points', 'img_inputs'])\n                ])\n        ],\n        classes=[\n            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n        ],\n        modality=dict(\n            use_lidar=False,\n            use_camera=True,\n            use_radar=False,\n            use_map=False,\n            use_external=False),\n        test_mode=True,\n        box_type_3d='LiDAR',\n        stereo=False,\n        filter_empty_gt=False,\n        img_info_prototype='bevdet'))\nevaluation = dict(\n    interval=1,\n    pipeline=[\n        dict(\n            type='PrepareImageInputs',\n            data_config=dict(\n                cams=[\n                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                ],\n                Ncams=6,\n                input_size=(256, 704),\n                src_size=(900, 1600),\n                resize=(-0.06, 0.11),\n                rot=(-5.4, 5.4),\n                flip=True,\n                crop_h=(0.0, 0.0),\n                resize_test=0.0),\n            sequential=False),\n        dict(\n            type='LoadAnnotationsBEVDepth',\n            bda_aug_conf=dict(\n                rot_lim=(-0.0, 0.0),\n                scale_lim=(1.0, 1.0),\n                flip_dx_ratio=0.5,\n                flip_dy_ratio=0.5),\n            classes=[\n                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                'traffic_cone'\n            ],\n            is_train=False),\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='MultiScaleFlipAug3D',\n            img_scale=(1333, 800),\n            pts_scale_ratio=1,\n            flip=False,\n            transforms=[\n                dict(\n                    type='DefaultFormatBundle3D',\n                    class_names=[\n                        'car', 'truck', 'construction_vehicle', 'bus',\n                        'trailer', 'barrier', 'motorcycle', 'bicycle',\n                        'pedestrian', 'traffic_cone'\n                    ],\n                    with_label=False),\n                dict(type='Collect3D', keys=['points', 'img_inputs'])\n            ])\n    ],\n    start=20)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=5)\nlog_config = dict(\n    interval=1,\n    hooks=[dict(type='TextLoggerHook'),\n           dict(type='TensorboardLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = './work_dirs/flashocc-r50'\nload_from = 'ckpts/bevdet-r50-cbgs.pth'\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\nplugin = True\nplugin_dir = 'projects/mmdet3d_plugin/'\ndata_config = dict(\n    cams=[\n        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',\n        'CAM_BACK', 'CAM_BACK_RIGHT'\n    ],\n    Ncams=6,\n    input_size=(256, 704),\n    src_size=(900, 1600),\n    resize=(-0.06, 0.11),\n    rot=(-5.4, 5.4),\n    flip=True,\n    crop_h=(0.0, 0.0),\n    resize_test=0.0)\ngrid_config = dict(\n    x=[-40, 40, 0.4],\n    y=[-40, 40, 0.4],\n    z=[-1, 5.4, 6.4],\n    depth=[1.0, 45.0, 0.5])\nvoxel_size = [0.1, 0.1, 0.2]\nnumC_Trans = 64\nmodel = dict(\n    type='BEVDetOCC',\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=False,\n        with_cp=True,\n        style='pytorch'),\n    img_neck=dict(\n        type='CustomFPN',\n        in_channels=[1024, 2048],\n        out_channels=256,\n        num_outs=1,\n        start_level=0,\n        out_ids=[0]),\n    img_view_transformer=dict(\n        type='LSSViewTransformer',\n        grid_config=dict(\n            x=[-40, 40, 0.4],\n            y=[-40, 40, 0.4],\n            z=[-1, 5.4, 6.4],\n            depth=[1.0, 45.0, 0.5]),\n        input_size=(256, 704),\n        in_channels=256,\n        out_channels=64,\n        sid=False,\n        collapse_z=True,\n        downsample=16),\n    img_bev_encoder_backbone=dict(\n        type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),\n    img_bev_encoder_neck=dict(\n        type='FPN_LSS', in_channels=640, out_channels=256),\n    occ_head=dict(\n        type='BEVOCCHead2D',\n        in_dim=256,\n        out_dim=256,\n        Dz=16,\n        use_mask=True,\n        num_classes=18,\n        use_predicter=True,\n        class_balance=False,\n        loss_occ=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            ignore_index=255,\n            loss_weight=1.0)))\nbda_aug_conf = dict(\n    rot_lim=(-0.0, 0.0),\n    scale_lim=(1.0, 1.0),\n    flip_dx_ratio=0.5,\n    flip_dy_ratio=0.5)\nshare_data_config = dict(\n    type='NuScenesDatasetOccpancy',\n    data_root='data/nuscenes/',\n    classes=[\n        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n    ],\n    modality=dict(\n        use_lidar=False,\n        use_camera=True,\n        use_radar=False,\n        use_map=False,\n        use_external=False),\n    stereo=False,\n    filter_empty_gt=False,\n    img_info_prototype='bevdet')\ntest_data_config = dict(\n    pipeline=[\n        dict(\n            type='PrepareImageInputs',\n            data_config=dict(\n                cams=[\n                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',\n                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'\n                ],\n                Ncams=6,\n                input_size=(256, 704),\n                src_size=(900, 1600),\n                resize=(-0.06, 0.11),\n                rot=(-5.4, 5.4),\n                flip=True,\n                crop_h=(0.0, 0.0),\n                resize_test=0.0),\n            sequential=False),\n        dict(\n            type='LoadAnnotationsBEVDepth',\n            bda_aug_conf=dict(\n                rot_lim=(-0.0, 0.0),\n                scale_lim=(1.0, 1.0),\n                flip_dx_ratio=0.5,\n                flip_dy_ratio=0.5),\n            classes=[\n                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',\n                'barrier', 'motorcycle', 'bicycle', 'pedestrian',\n                'traffic_cone'\n            ],\n            is_train=False),\n        dict(\n            type='LoadPointsFromFile',\n            coord_type='LIDAR',\n            load_dim=5,\n            use_dim=5,\n            file_client_args=dict(backend='disk')),\n        dict(\n            type='MultiScaleFlipAug3D',\n            img_scale=(1333, 800),\n            pts_scale_ratio=1,\n            flip=False,\n            transforms=[\n                dict(\n                    type='DefaultFormatBundle3D',\n                    class_names=[\n                        'car', 'truck', 'construction_vehicle', 'bus',\n                        'trailer', 'barrier', 'motorcycle', 'bicycle',\n                        'pedestrian', 'traffic_cone'\n                    ],\n                    with_label=False),\n                dict(type='Collect3D', keys=['points', 'img_inputs'])\n            ])\n    ],\n    ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',\n    type='NuScenesDatasetOccpancy',\n    data_root='data/nuscenes/',\n    classes=[\n        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',\n        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'\n    ],\n    modality=dict(\n        use_lidar=False,\n        use_camera=True,\n        use_radar=False,\n        use_map=False,\n        use_external=False),\n    stereo=False,\n    filter_empty_gt=False,\n    img_info_prototype='bevdet')\nkey = 'test'\noptimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=200,\n    warmup_ratio=0.001,\n    step=[24])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\ncustom_hooks = [\n    dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')\n]\ngpu_ids = range(0, 8)\n", "seed": 0, "exp_name": "flashocc-r50.py"}
+{"mode": "train", "epoch": 1, "iter": 1, "lr": 0.0, "memory": 32423, "data_time": 15.43241, "loss_occ": 3.00864, "loss": 3.00864, "grad_norm": 3.90007, "time": 659.74966}
+{"mode": "train", "epoch": 1, "iter": 2, "lr": 0.0, "memory": 32769, "data_time": 0.00516, "loss_occ": 3.01287, "loss": 3.01287, "grad_norm": 3.9387, "time": 4.70719}
+{"mode": "train", "epoch": 1, "iter": 3, "lr": 0.0, "memory": 32769, "data_time": 0.00255, "loss_occ": 3.015, "loss": 3.015, "grad_norm": 3.89853, "time": 1.13283}
+{"mode": "train", "epoch": 1, "iter": 4, "lr": 0.0, "memory": 32769, "data_time": 0.00234, "loss_occ": 3.00854, "loss": 3.00854, "grad_norm": 3.87413, "time": 1.12622}
+{"mode": "train", "epoch": 1, "iter": 5, "lr": 0.0, "memory": 32769, "data_time": 0.00279, "loss_occ": 3.0044, "loss": 3.0044, "grad_norm": 3.86394, "time": 1.12671}
+{"mode": "train", "epoch": 1, "iter": 6, "lr": 0.0, "memory": 32769, "data_time": 0.00295, "loss_occ": 3.00848, "loss": 3.00848, "grad_norm": 3.84233, "time": 1.12617}
+{"mode": "train", "epoch": 1, "iter": 7, "lr": 0.0, "memory": 32769, "data_time": 0.00263, "loss_occ": 3.0085, "loss": 3.0085, "grad_norm": 3.94733, "time": 1.12608}
+{"mode": "train", "epoch": 1, "iter": 8, "lr": 0.0, "memory": 32769, "data_time": 0.00604, "loss_occ": 3.00383, "loss": 3.00383, "grad_norm": 3.9215, "time": 1.13024}
+{"mode": "train", "epoch": 1, "iter": 9, "lr": 0.0, "memory": 32769, "data_time": 0.0036, "loss_occ": 2.99689, "loss": 2.99689, "grad_norm": 3.86593, "time": 1.1281}
+{"mode": "train", "epoch": 1, "iter": 10, "lr": 0.0, "memory": 32769, "data_time": 0.00266, "loss_occ": 2.9957, "loss": 2.9957, "grad_norm": 3.88205, "time": 1.12727}
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/flashocc-r50.py
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/flashocc-r50.py
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+dataset_type = 'NuScenesDatasetOccpancy'
+data_root = 'data/nuscenes/'
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+file_client_args = dict(backend='disk')
+train_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        is_train=True,
+        data_config=dict(
+            cams=[
+                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+            ],
+            Ncams=6,
+            input_size=(256, 704),
+            src_size=(900, 1600),
+            resize=(-0.06, 0.11),
+            rot=(-5.4, 5.4),
+            flip=True,
+            crop_h=(0.0, 0.0),
+            resize_test=0.0),
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=dict(
+            rot_lim=(-0.0, 0.0),
+            scale_lim=(1.0, 1.0),
+            flip_dx_ratio=0.5,
+            flip_dy_ratio=0.5),
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        is_train=True),
+    dict(type='LoadOccGTFromFile'),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='PointToMultiViewDepth',
+        downsample=1,
+        grid_config=dict(
+            x=[-40, 40, 0.4],
+            y=[-40, 40, 0.4],
+            z=[-1, 5.4, 6.4],
+            depth=[1.0, 45.0, 0.5])),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ]),
+    dict(
+        type='Collect3D',
+        keys=[
+            'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
+            'mask_camera'
+        ])
+]
+test_pipeline = [
+    dict(
+        type='PrepareImageInputs',
+        data_config=dict(
+            cams=[
+                'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+            ],
+            Ncams=6,
+            input_size=(256, 704),
+            src_size=(900, 1600),
+            resize=(-0.06, 0.11),
+            rot=(-5.4, 5.4),
+            flip=True,
+            crop_h=(0.0, 0.0),
+            resize_test=0.0),
+        sequential=False),
+    dict(
+        type='LoadAnnotationsBEVDepth',
+        bda_aug_conf=dict(
+            rot_lim=(-0.0, 0.0),
+            scale_lim=(1.0, 1.0),
+            flip_dx_ratio=0.5,
+            flip_dy_ratio=0.5),
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        is_train=False),
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                with_label=False),
+            dict(type='Collect3D', keys=['points', 'img_inputs'])
+        ])
+]
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        file_client_args=dict(backend='disk')),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=[
+            'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+            'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+        ],
+        with_label=False),
+    dict(type='Collect3D', keys=['points'])
+]
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=24,
+    train=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_train.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                is_train=True,
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=True),
+            dict(type='LoadOccGTFromFile'),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='PointToMultiViewDepth',
+                downsample=1,
+                grid_config=dict(
+                    x=[-40, 40, 0.4],
+                    y=[-40, 40, 0.4],
+                    z=[-1, 5.4, 6.4],
+                    depth=[1.0, 45.0, 0.5])),
+            dict(
+                type='DefaultFormatBundle3D',
+                class_names=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ]),
+            dict(
+                type='Collect3D',
+                keys=[
+                    'img_inputs', 'gt_depth', 'voxel_semantics', 'mask_lidar',
+                    'mask_camera'
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=False,
+        box_type_3d='LiDAR',
+        use_valid_flag=True,
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'),
+    val=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=False),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='MultiScaleFlipAug3D',
+                img_scale=(1333, 800),
+                pts_scale_ratio=1,
+                flip=False,
+                transforms=[
+                    dict(
+                        type='DefaultFormatBundle3D',
+                        class_names=[
+                            'car', 'truck', 'construction_vehicle', 'bus',
+                            'trailer', 'barrier', 'motorcycle', 'bicycle',
+                            'pedestrian', 'traffic_cone'
+                        ],
+                        with_label=False),
+                    dict(type='Collect3D', keys=['points', 'img_inputs'])
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=True,
+        box_type_3d='LiDAR',
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'),
+    test=dict(
+        type='NuScenesDatasetOccpancy',
+        data_root='data/nuscenes/',
+        ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+        pipeline=[
+            dict(
+                type='PrepareImageInputs',
+                data_config=dict(
+                    cams=[
+                        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                        'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                    ],
+                    Ncams=6,
+                    input_size=(256, 704),
+                    src_size=(900, 1600),
+                    resize=(-0.06, 0.11),
+                    rot=(-5.4, 5.4),
+                    flip=True,
+                    crop_h=(0.0, 0.0),
+                    resize_test=0.0),
+                sequential=False),
+            dict(
+                type='LoadAnnotationsBEVDepth',
+                bda_aug_conf=dict(
+                    rot_lim=(-0.0, 0.0),
+                    scale_lim=(1.0, 1.0),
+                    flip_dx_ratio=0.5,
+                    flip_dy_ratio=0.5),
+                classes=[
+                    'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                    'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                    'traffic_cone'
+                ],
+                is_train=False),
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                load_dim=5,
+                use_dim=5,
+                file_client_args=dict(backend='disk')),
+            dict(
+                type='MultiScaleFlipAug3D',
+                img_scale=(1333, 800),
+                pts_scale_ratio=1,
+                flip=False,
+                transforms=[
+                    dict(
+                        type='DefaultFormatBundle3D',
+                        class_names=[
+                            'car', 'truck', 'construction_vehicle', 'bus',
+                            'trailer', 'barrier', 'motorcycle', 'bicycle',
+                            'pedestrian', 'traffic_cone'
+                        ],
+                        with_label=False),
+                    dict(type='Collect3D', keys=['points', 'img_inputs'])
+                ])
+        ],
+        classes=[
+            'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+            'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+        ],
+        modality=dict(
+            use_lidar=False,
+            use_camera=True,
+            use_radar=False,
+            use_map=False,
+            use_external=False),
+        test_mode=True,
+        box_type_3d='LiDAR',
+        stereo=False,
+        filter_empty_gt=False,
+        img_info_prototype='bevdet'))
+evaluation = dict(
+    interval=1,
+    pipeline=[
+        dict(
+            type='PrepareImageInputs',
+            data_config=dict(
+                cams=[
+                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                ],
+                Ncams=6,
+                input_size=(256, 704),
+                src_size=(900, 1600),
+                resize=(-0.06, 0.11),
+                rot=(-5.4, 5.4),
+                flip=True,
+                crop_h=(0.0, 0.0),
+                resize_test=0.0),
+            sequential=False),
+        dict(
+            type='LoadAnnotationsBEVDepth',
+            bda_aug_conf=dict(
+                rot_lim=(-0.0, 0.0),
+                scale_lim=(1.0, 1.0),
+                flip_dx_ratio=0.5,
+                flip_dy_ratio=0.5),
+            classes=[
+                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                'traffic_cone'
+            ],
+            is_train=False),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=5,
+            use_dim=5,
+            file_client_args=dict(backend='disk')),
+        dict(
+            type='MultiScaleFlipAug3D',
+            img_scale=(1333, 800),
+            pts_scale_ratio=1,
+            flip=False,
+            transforms=[
+                dict(
+                    type='DefaultFormatBundle3D',
+                    class_names=[
+                        'car', 'truck', 'construction_vehicle', 'bus',
+                        'trailer', 'barrier', 'motorcycle', 'bicycle',
+                        'pedestrian', 'traffic_cone'
+                    ],
+                    with_label=False),
+                dict(type='Collect3D', keys=['points', 'img_inputs'])
+            ])
+    ],
+    start=20)
+checkpoint_config = dict(interval=1, max_keep_ckpts=5)
+log_config = dict(
+    interval=1,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/flashocc-r50'
+load_from = 'ckpts/bevdet-r50-cbgs.pth'
+resume_from = None
+workflow = [('train', 1)]
+opencv_num_threads = 0
+mp_start_method = 'fork'
+plugin = True
+plugin_dir = 'projects/mmdet3d_plugin/'
+data_config = dict(
+    cams=[
+        'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
+        'CAM_BACK', 'CAM_BACK_RIGHT'
+    ],
+    Ncams=6,
+    input_size=(256, 704),
+    src_size=(900, 1600),
+    resize=(-0.06, 0.11),
+    rot=(-5.4, 5.4),
+    flip=True,
+    crop_h=(0.0, 0.0),
+    resize_test=0.0)
+grid_config = dict(
+    x=[-40, 40, 0.4],
+    y=[-40, 40, 0.4],
+    z=[-1, 5.4, 6.4],
+    depth=[1.0, 45.0, 0.5])
+voxel_size = [0.1, 0.1, 0.2]
+numC_Trans = 64
+model = dict(
+    type='BEVDetOCC',
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=False,
+        with_cp=True,
+        style='pytorch'),
+    img_neck=dict(
+        type='CustomFPN',
+        in_channels=[1024, 2048],
+        out_channels=256,
+        num_outs=1,
+        start_level=0,
+        out_ids=[0]),
+    img_view_transformer=dict(
+        type='LSSViewTransformer',
+        grid_config=dict(
+            x=[-40, 40, 0.4],
+            y=[-40, 40, 0.4],
+            z=[-1, 5.4, 6.4],
+            depth=[1.0, 45.0, 0.5]),
+        input_size=(256, 704),
+        in_channels=256,
+        out_channels=64,
+        sid=False,
+        collapse_z=True,
+        downsample=16),
+    img_bev_encoder_backbone=dict(
+        type='CustomResNet', numC_input=64, num_channels=[128, 256, 512]),
+    img_bev_encoder_neck=dict(
+        type='FPN_LSS', in_channels=640, out_channels=256),
+    occ_head=dict(
+        type='BEVOCCHead2D',
+        in_dim=256,
+        out_dim=256,
+        Dz=16,
+        use_mask=True,
+        num_classes=18,
+        use_predicter=True,
+        class_balance=False,
+        loss_occ=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=False,
+            ignore_index=255,
+            loss_weight=1.0)))
+bda_aug_conf = dict(
+    rot_lim=(-0.0, 0.0),
+    scale_lim=(1.0, 1.0),
+    flip_dx_ratio=0.5,
+    flip_dy_ratio=0.5)
+share_data_config = dict(
+    type='NuScenesDatasetOccpancy',
+    data_root='data/nuscenes/',
+    classes=[
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ],
+    modality=dict(
+        use_lidar=False,
+        use_camera=True,
+        use_radar=False,
+        use_map=False,
+        use_external=False),
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet')
+test_data_config = dict(
+    pipeline=[
+        dict(
+            type='PrepareImageInputs',
+            data_config=dict(
+                cams=[
+                    'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT',
+                    'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT'
+                ],
+                Ncams=6,
+                input_size=(256, 704),
+                src_size=(900, 1600),
+                resize=(-0.06, 0.11),
+                rot=(-5.4, 5.4),
+                flip=True,
+                crop_h=(0.0, 0.0),
+                resize_test=0.0),
+            sequential=False),
+        dict(
+            type='LoadAnnotationsBEVDepth',
+            bda_aug_conf=dict(
+                rot_lim=(-0.0, 0.0),
+                scale_lim=(1.0, 1.0),
+                flip_dx_ratio=0.5,
+                flip_dy_ratio=0.5),
+            classes=[
+                'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
+                'barrier', 'motorcycle', 'bicycle', 'pedestrian',
+                'traffic_cone'
+            ],
+            is_train=False),
+        dict(
+            type='LoadPointsFromFile',
+            coord_type='LIDAR',
+            load_dim=5,
+            use_dim=5,
+            file_client_args=dict(backend='disk')),
+        dict(
+            type='MultiScaleFlipAug3D',
+            img_scale=(1333, 800),
+            pts_scale_ratio=1,
+            flip=False,
+            transforms=[
+                dict(
+                    type='DefaultFormatBundle3D',
+                    class_names=[
+                        'car', 'truck', 'construction_vehicle', 'bus',
+                        'trailer', 'barrier', 'motorcycle', 'bicycle',
+                        'pedestrian', 'traffic_cone'
+                    ],
+                    with_label=False),
+                dict(type='Collect3D', keys=['points', 'img_inputs'])
+            ])
+    ],
+    ann_file='data/nuscenes/bevdetv2-nuscenes_infos_val.pkl',
+    type='NuScenesDatasetOccpancy',
+    data_root='data/nuscenes/',
+    classes=[
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ],
+    modality=dict(
+        use_lidar=False,
+        use_camera=True,
+        use_radar=False,
+        use_map=False,
+        use_external=False),
+    stereo=False,
+    filter_empty_gt=False,
+    img_info_prototype='bevdet')
+key = 'test'
+optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=200,
+    warmup_ratio=0.001,
+    step=[24])
+runner = dict(type='EpochBasedRunner', max_epochs=24)
+custom_hooks = [
+    dict(type='MEGVIIEMAHook', init_updates=10560, priority='NORMAL')
+]
+gpu_ids = range(0, 8)
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204673.bw61.849.0
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204673.bw61.849.0
--- a/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204818.bw61.20636.0
+++ b/docker-hub/FlashOCC/Flashocc/work_dirs/flashocc-r50/tf_logs/events.out.tfevents.1775204818.bw61.20636.0
--- a/MapTR @ e03f097a
+++ b/MapTR @ e03f097a
+Subproject commit e03f097abef19e1ba3fed5f471a8d80fbfa0a064
--- a/mmdetection3d @ 962f0937
+++ b/mmdetection3d @ 962f0937
+Subproject commit 962f093736ffe55c089bc618842a8b8567318c8c
--- a/Sparse4D @ c41df4bb
+++ b/Sparse4D @ c41df4bb
+Subproject commit c41df4bbf7bc82490f11ff55173abfcb3fb91425
--- a/docker-hub/qwen2.5-vl/readme.md
+++ b/docker-hub/qwen2.5-vl/readme.md
@@ -10,7 +10,7 @@ git clone -b core_v0.12.0 --recurse-submodules http://10.16.6.30/dcutoolkit/deep
 cd dcu_megatron-core_v0.12.0/
 python setup.py install

-cd ../llama-factory
+cd ./llama-factory
 pip install -r requirements.txt
 ```


--- a/docker-hub/wan2.1/readme.md
+++ b/docker-hub/wan2.1/readme.md
@@ -4,7 +4,7 @@

 ```
 docker run -dit --network=host --name=wan21 --privileged --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size=128G  --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root --ulimit stack=-1:-1 --ulimit memlock=-1:-1 -v /opt/hyhal:/opt/hyhal:ro -v /public/opendas/DL_DATA/llm-models/:/models:ro  harbor.sourcefind.cn:5443/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dtk25.04.4-1230-py3.10-20260115
-cd musubi-tuner
+cd ./musubi-tuner
 pip install -e .
 ```