Use pre-commit to reformat code

Use pre-commit to reformat code

Use pre-commit to reformat code
41b18fd8 · zhe chen · ff20ea39 · 41b18fd8 · 41b18fd8 · 41b18fd8
Commit 41b18fd8 authored Jan 06, 2025 by zhe chen
20 changed files
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py
@@ -2,6 +2,7 @@ import math

 import torch
 from torch.utils.data import DistributedSampler as _DistributedSampler
+
 from .sampler import SAMPLER


@@ -33,9 +34,9 @@ class DistributedSampler(_DistributedSampler):
        assert len(indices) == self.total_size

        # subsample
-        per_replicas = self.total_size//self.num_replicas
+        per_replicas = self.total_size // self.num_replicas
        # indices = indices[self.rank:self.total_size:self.num_replicas]
-        indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]
+        indices = indices[self.rank * per_replicas:(self.rank + 1) * per_replicas]
        assert len(indices) == self.num_samples

        return iter(indices)
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py
-
 # Copyright (c) OpenMMLab. All rights reserved.
 import math

@@ -6,9 +5,8 @@ import numpy as np
 import torch
 from mmcv.runner import get_dist_info
 from torch.utils.data import Sampler
+
 from .sampler import SAMPLER
-import random
-from IPython import embed


 @SAMPLER.register_module()
@@ -107,4 +105,3 @@ class DistributedGroupSampler(Sampler):

    def set_epoch(self, epoch):
        self.epoch = epoch
-
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/sampler.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/sampler.py
-from mmcv.utils.registry import Registry, build_from_cfg
-
-SAMPLER = Registry('sampler')
-
-
-def build_sampler(cfg, default_args):
-    return build_from_cfg(cfg, SAMPLER, default_args)
+from mmcv.utils.registry import Registry, build_from_cfg
+
+SAMPLER = Registry('sampler')
+
+
+def build_sampler(cfg, default_args):
+    return build_from_cfg(cfg, SAMPLER, default_args)
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/backbones/__init__.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/backbones/__init__.py
 from .vovnet import VoVNet

-__all__ = ['VoVNet']
\ No newline at end of file
+__all__ = ['VoVNet']
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/backbones/vovnet.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/backbones/vovnet.py
-
 from collections import OrderedDict
-from mmcv.runner import BaseModule
-from mmdet.models.builder import BACKBONES
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from mmcv.runner import BaseModule
+from mmdet.models.builder import BACKBONES
 from torch.nn.modules.batchnorm import _BatchNorm

-
 VoVNet19_slim_dw_eSE = {
    'stem': [64, 64, 64],
    'stage_conv_ch': [64, 80, 96, 112],
    'stage_out_ch': [112, 256, 384, 512],
-    "layer_per_block": 3,
-    "block_per_stage": [1, 1, 1, 1],
-    "eSE": True,
-    "dw": True
+    'layer_per_block': 3,
+    'block_per_stage': [1, 1, 1, 1],
+    'eSE': True,
+    'dw': True
 }

 VoVNet19_dw_eSE = {
    'stem': [64, 64, 64],
-    "stage_conv_ch": [128, 160, 192, 224],
-    "stage_out_ch": [256, 512, 768, 1024],
-    "layer_per_block": 3,
-    "block_per_stage": [1, 1, 1, 1],
-    "eSE": True,
-    "dw": True
+    'stage_conv_ch': [128, 160, 192, 224],
+    'stage_out_ch': [256, 512, 768, 1024],
+    'layer_per_block': 3,
+    'block_per_stage': [1, 1, 1, 1],
+    'eSE': True,
+    'dw': True
 }

 VoVNet19_slim_eSE = {
@@ -35,57 +34,57 @@ VoVNet19_slim_eSE = {
    'layer_per_block': 3,
    'block_per_stage': [1, 1, 1, 1],
    'eSE': True,
-    "dw": False
+    'dw': False
 }

 VoVNet19_eSE = {
    'stem': [64, 64, 128],
-    "stage_conv_ch": [128, 160, 192, 224],
-    "stage_out_ch": [256, 512, 768, 1024],
-    "layer_per_block": 3,
-    "block_per_stage": [1, 1, 1, 1],
-    "eSE": True,
-    "dw": False
+    'stage_conv_ch': [128, 160, 192, 224],
+    'stage_out_ch': [256, 512, 768, 1024],
+    'layer_per_block': 3,
+    'block_per_stage': [1, 1, 1, 1],
+    'eSE': True,
+    'dw': False
 }

 VoVNet39_eSE = {
    'stem': [64, 64, 128],
-    "stage_conv_ch": [128, 160, 192, 224],
-    "stage_out_ch": [256, 512, 768, 1024],
-    "layer_per_block": 5,
-    "block_per_stage": [1, 1, 2, 2],
-    "eSE": True,
-    "dw": False
+    'stage_conv_ch': [128, 160, 192, 224],
+    'stage_out_ch': [256, 512, 768, 1024],
+    'layer_per_block': 5,
+    'block_per_stage': [1, 1, 2, 2],
+    'eSE': True,
+    'dw': False
 }

 VoVNet57_eSE = {
    'stem': [64, 64, 128],
-    "stage_conv_ch": [128, 160, 192, 224],
-    "stage_out_ch": [256, 512, 768, 1024],
-    "layer_per_block": 5,
-    "block_per_stage": [1, 1, 4, 3],
-    "eSE": True,
-    "dw": False
+    'stage_conv_ch': [128, 160, 192, 224],
+    'stage_out_ch': [256, 512, 768, 1024],
+    'layer_per_block': 5,
+    'block_per_stage': [1, 1, 4, 3],
+    'eSE': True,
+    'dw': False
 }

 VoVNet99_eSE = {
    'stem': [64, 64, 128],
-    "stage_conv_ch": [128, 160, 192, 224],
-    "stage_out_ch": [256, 512, 768, 1024],
-    "layer_per_block": 5,
-    "block_per_stage": [1, 3, 9, 3],
-    "eSE": True,
-    "dw": False
+    'stage_conv_ch': [128, 160, 192, 224],
+    'stage_out_ch': [256, 512, 768, 1024],
+    'layer_per_block': 5,
+    'block_per_stage': [1, 3, 9, 3],
+    'eSE': True,
+    'dw': False
 }

 _STAGE_SPECS = {
-    "V-19-slim-dw-eSE": VoVNet19_slim_dw_eSE,
-    "V-19-dw-eSE": VoVNet19_dw_eSE,
-    "V-19-slim-eSE": VoVNet19_slim_eSE,
-    "V-19-eSE": VoVNet19_eSE,
-    "V-39-eSE": VoVNet39_eSE,
-    "V-57-eSE": VoVNet57_eSE,
-    "V-99-eSE": VoVNet99_eSE,
+    'V-19-slim-dw-eSE': VoVNet19_slim_dw_eSE,
+    'V-19-dw-eSE': VoVNet19_dw_eSE,
+    'V-19-slim-eSE': VoVNet19_slim_eSE,
+    'V-19-eSE': VoVNet19_eSE,
+    'V-39-eSE': VoVNet39_eSE,
+    'V-57-eSE': VoVNet57_eSE,
+    'V-99-eSE': VoVNet99_eSE,
 }


@@ -117,7 +116,7 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
    """3x3 convolution with padding"""
    return [
        (
-            f"{module_name}_{postfix}/conv",
+            f'{module_name}_{postfix}/conv',
            nn.Conv2d(
                in_channels,
                out_channels,
@@ -128,8 +127,8 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
                bias=False,
            ),
        ),
-        (f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)),
-        (f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
+        (f'{module_name}_{postfix}/norm', nn.BatchNorm2d(out_channels)),
+        (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)),
    ]


@@ -137,7 +136,7 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
    """1x1 convolution with padding"""
    return [
        (
-            f"{module_name}_{postfix}/conv",
+            f'{module_name}_{postfix}/conv',
            nn.Conv2d(
                in_channels,
                out_channels,
@@ -148,8 +147,8 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
                bias=False,
            ),
        ),
-        (f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)),
-        (f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
+        (f'{module_name}_{postfix}/norm', nn.BatchNorm2d(out_channels)),
+        (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)),
    ]


@@ -179,7 +178,7 @@ class eSEModule(nn.Module):

 class _OSA_module(nn.Module):
    def __init__(
-        self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False
+            self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False
    ):

        super(_OSA_module, self).__init__()
@@ -192,7 +191,7 @@ class _OSA_module(nn.Module):
        if self.depthwise and in_channel != stage_ch:
            self.isReduced = True
            self.conv_reduction = nn.Sequential(
-                OrderedDict(conv1x1(in_channel, stage_ch, "{}_reduction".format(module_name), "0"))
+                OrderedDict(conv1x1(in_channel, stage_ch, '{}_reduction'.format(module_name), '0'))
            )
        for i in range(layer_per_block):
            if self.depthwise:
@@ -203,7 +202,7 @@ class _OSA_module(nn.Module):

        # feature aggregation
        in_channel = in_ch + layer_per_block * stage_ch
-        self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat")))
+        self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, 'concat')))

        self.ese = eSEModule(concat_ch)

@@ -232,24 +231,24 @@ class _OSA_module(nn.Module):

 class _OSA_stage(nn.Sequential):
    def __init__(
-        self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False
+            self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False
    ):

        super(_OSA_stage, self).__init__()

        if not stage_num == 2:
-            self.add_module("Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))
+            self.add_module('Pooling', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))

        if block_per_stage != 1:
            SE = False
-        module_name = f"OSA{stage_num}_1"
+        module_name = f'OSA{stage_num}_1'
        self.add_module(
            module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise)
        )
        for i in range(block_per_stage - 1):
            if i != block_per_stage - 2:  # last block
                SE = False
-            module_name = f"OSA{stage_num}_{i + 2}"
+            module_name = f'OSA{stage_num}_{i + 2}'
            self.add_module(
                module_name,
                _OSA_module(
@@ -267,7 +266,7 @@ class _OSA_stage(nn.Sequential):

 @BACKBONES.register_module()
 class VoVNet(BaseModule):
-    def __init__(self, spec_name, input_ch=3, out_features=None, 
+    def __init__(self, spec_name, input_ch=3, out_features=None,
                 frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None):
        """
        Args:
@@ -285,32 +284,32 @@ class VoVNet(BaseModule):
            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
        stage_specs = _STAGE_SPECS[spec_name]

-        stem_ch = stage_specs["stem"]
-        config_stage_ch = stage_specs["stage_conv_ch"]
-        config_concat_ch = stage_specs["stage_out_ch"]
-        block_per_stage = stage_specs["block_per_stage"]
-        layer_per_block = stage_specs["layer_per_block"]
-        SE = stage_specs["eSE"]
-        depthwise = stage_specs["dw"]
+        stem_ch = stage_specs['stem']
+        config_stage_ch = stage_specs['stage_conv_ch']
+        config_concat_ch = stage_specs['stage_out_ch']
+        block_per_stage = stage_specs['block_per_stage']
+        layer_per_block = stage_specs['layer_per_block']
+        SE = stage_specs['eSE']
+        depthwise = stage_specs['dw']

        self._out_features = out_features

        # Stem module
        conv_type = dw_conv3x3 if depthwise else conv3x3
-        stem = conv3x3(input_ch, stem_ch[0], "stem", "1", 2)
-        stem += conv_type(stem_ch[0], stem_ch[1], "stem", "2", 1)
-        stem += conv_type(stem_ch[1], stem_ch[2], "stem", "3", 2)
-        self.add_module("stem", nn.Sequential((OrderedDict(stem))))
+        stem = conv3x3(input_ch, stem_ch[0], 'stem', '1', 2)
+        stem += conv_type(stem_ch[0], stem_ch[1], 'stem', '2', 1)
+        stem += conv_type(stem_ch[1], stem_ch[2], 'stem', '3', 2)
+        self.add_module('stem', nn.Sequential((OrderedDict(stem))))
        current_stirde = 4
-        self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde}
-        self._out_feature_channels = {"stem": stem_ch[2]}
+        self._out_feature_strides = {'stem': current_stirde, 'stage2': current_stirde}
+        self._out_feature_channels = {'stem': stem_ch[2]}

        stem_out_ch = [stem_ch[2]]
        in_ch_list = stem_out_ch + config_concat_ch[:-1]
        # OSA stages
        self.stage_names = []
        for i in range(4):  # num_stages
-            name = "stage%d" % (i + 2)  # stage 2 ... stage 5
+            name = 'stage%d' % (i + 2)  # stage 2 ... stage 5
            self.stage_names.append(name)
            self.add_module(
                name,
@@ -341,8 +340,8 @@ class VoVNet(BaseModule):
    def forward(self, x):
        outputs = {}
        x = self.stem(x)
-        if "stem" in self._out_features:
-            outputs["stem"] = x
+        if 'stem' in self._out_features:
+            outputs['stem'] = x
        for name in self.stage_names:
            x = getattr(self, name)(x)
            if name in self._out_features:
@@ -358,7 +357,7 @@ class VoVNet(BaseModule):
                param.requires_grad = False

        for i in range(1, self.frozen_stages + 1):
-            m = getattr(self, f'stage{i+1}')
+            m = getattr(self, f'stage{i + 1}')
            m.eval()
            for param in m.parameters():
                param.requires_grad = False
@@ -372,4 +371,4 @@ class VoVNet(BaseModule):
            for m in self.modules():
                # trick: eval have effect on BatchNorm only
                if isinstance(m, _BatchNorm):
-                    m.eval()
\ No newline at end of file
+                    m.eval()
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/__init__.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/__init__.py
-from .hooks import GradChecker
\ No newline at end of file
+from .hooks import GradChecker
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/hooks.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/hooks.py
 from mmcv.runner.hooks.hook import HOOKS, Hook
-from projects.mmdet3d_plugin.models.utils import run_time


 @HOOKS.register_module()
@@ -7,7 +6,5 @@ class GradChecker(Hook):

    def after_train_iter(self, runner):
        for key, val in runner.model.named_parameters():
-            if val.grad == None and val.requires_grad:
+            if val.grad is None and val.requires_grad:
                print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
-
-
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/opt/__init__.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/opt/__init__.py
-from .adamw import AdamW2
\ No newline at end of file
+from .adamw import AdamW2
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/opt/adamw.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/opt/adamw.py
@@ -4,8 +4,9 @@ except:
    print('WARNING!!!, I recommend using torch>=1.8')

 import torch
-from torch.optim.optimizer import Optimizer
 from mmcv.runner.optimizer.builder import OPTIMIZERS
+from torch.optim.optimizer import Optimizer
+

 @OPTIMIZERS.register_module()
 class AdamW2(Optimizer):
@@ -38,15 +39,15 @@ class AdamW2(Optimizer):
    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
                 weight_decay=1e-2, amsgrad=False):
        if not 0.0 <= lr:
-            raise ValueError("Invalid learning rate: {}".format(lr))
+            raise ValueError('Invalid learning rate: {}'.format(lr))
        if not 0.0 <= eps:
-            raise ValueError("Invalid epsilon value: {}".format(eps))
+            raise ValueError('Invalid epsilon value: {}'.format(eps))
        if not 0.0 <= betas[0] < 1.0:
-            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+            raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
-            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+            raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1]))
        if not 0.0 <= weight_decay:
-            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
+            raise ValueError('Invalid weight_decay value: {}'.format(weight_decay))
        defaults = dict(lr=lr, betas=betas, eps=eps,
                        weight_decay=weight_decay, amsgrad=amsgrad)
        super(AdamW2, self).__init__(params, defaults)
@@ -109,7 +110,6 @@ class AdamW2(Optimizer):
                if amsgrad:
                    max_exp_avg_sqs.append(state['max_exp_avg_sq'])

-
                # update the steps for each param group update
                state['step'] += 1
                # record the step after step update
@@ -128,4 +128,4 @@ class AdamW2(Optimizer):
                    group['weight_decay'],
                    group['eps'])

-        return loss
\ No newline at end of file
+        return loss
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/__init__.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/__init__.py
-
 from .bricks import run_time
 from .grid_mask import GridMask
 from .position_embedding import RelPositionEmbedding
+from .positional_encoding import LearnedPositionalEncoding3D
 from .visual import save_tensor
-from .positional_encoding import LearnedPositionalEncoding3D
\ No newline at end of file
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/bricks.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/bricks.py
-import functools
 import time
 from collections import defaultdict
+
 import torch
-time_maps = defaultdict(lambda :0.)
-count_maps = defaultdict(lambda :0.)
+
+time_maps = defaultdict(lambda: 0.)
+count_maps = defaultdict(lambda: 0.)
+
+
 def run_time(name):
    def middle(fn):
        def wrapper(*args, **kwargs):
@@ -11,10 +14,12 @@ def run_time(name):
            start = time.time()
            res = fn(*args, **kwargs)
            torch.cuda.synchronize()
-            time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
-            count_maps['%s : %s'%(name, fn.__name__) ] +=1
-            print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
+            time_maps['%s : %s' % (name, fn.__name__)] += time.time() - start
+            count_maps['%s : %s' % (name, fn.__name__)] += 1
+            print('%s : %s takes up %f ' % (name, fn.__name__, time_maps['%s : %s' % (name, fn.__name__)] / count_maps[
+                '%s : %s' % (name, fn.__name__)]))
            return res
+
        return wrapper
+
    return middle
-    
\ No newline at end of file
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/grid_mask.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/grid_mask.py
+import numpy as np
 import torch
 import torch.nn as nn
-import numpy as np
+from mmcv.runner import auto_fp16
 from PIL import Image
-from mmcv.runner import force_fp32, auto_fp16
+

 class Grid(object):
-    def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
+    def __init__(self, use_h, use_w, rotate=1, offset=False, ratio=0.5, mode=0, prob=1.):
        self.use_h = use_h
        self.use_w = use_w
        self.rotate = rotate
        self.offset = offset
        self.ratio = ratio
-        self.mode=mode
+        self.mode = mode
        self.st_prob = prob
        self.prob = prob

@@ -25,50 +26,50 @@ class Grid(object):
        w = img.size(2)
        self.d1 = 2
        self.d2 = min(h, w)
-        hh = int(1.5*h)
-        ww = int(1.5*w)
+        hh = int(1.5 * h)
+        ww = int(1.5 * w)
        d = np.random.randint(self.d1, self.d2)
        if self.ratio == 1:
            self.l = np.random.randint(1, d)
        else:
-            self.l = min(max(int(d*self.ratio+0.5),1),d-1)
+            self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
        mask = np.ones((hh, ww), np.float32)
        st_h = np.random.randint(d)
        st_w = np.random.randint(d)
        if self.use_h:
-            for i in range(hh//d):
-                s = d*i + st_h
-                t = min(s+self.l, hh)
-                mask[s:t,:] *= 0
+            for i in range(hh // d):
+                s = d * i + st_h
+                t = min(s + self.l, hh)
+                mask[s:t, :] *= 0
        if self.use_w:
-            for i in range(ww//d):
-                s = d*i + st_w
-                t = min(s+self.l, ww)
-                mask[:,s:t] *= 0
-       
+            for i in range(ww // d):
+                s = d * i + st_w
+                t = min(s + self.l, ww)
+                mask[:, s:t] *= 0
+
        r = np.random.randint(self.rotate)
        mask = Image.fromarray(np.uint8(mask))
        mask = mask.rotate(r)
        mask = np.asarray(mask)
-        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
+        mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2 + w]

        mask = torch.from_numpy(mask).float()
        if self.mode == 1:
-            mask = 1-mask
+            mask = 1 - mask

        mask = mask.expand_as(img)
        if self.offset:
-            offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
+            offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).float()
            offset = (1 - mask) * offset
            img = img * mask + offset
        else:
-            img = img * mask 
+            img = img * mask

        return img, label


 class GridMask(nn.Module):
-    def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
+    def __init__(self, use_h, use_w, rotate=1, offset=False, ratio=0.5, mode=0, prob=1.):
        super(GridMask, self).__init__()
        self.use_h = use_h
        self.use_w = use_w
@@ -79,46 +80,48 @@ class GridMask(nn.Module):
        self.st_prob = prob
        self.prob = prob
        self.fp16_enable = False
+
    def set_prob(self, epoch, max_epoch):
-        self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
+        self.prob = self.st_prob * epoch / max_epoch  # + 1.#0.5
+
    @auto_fp16()
    def forward(self, x):
        if np.random.rand() > self.prob or not self.training:
            return x
-        n,c,h,w = x.size()
-        x = x.view(-1,h,w)
-        hh = int(1.5*h)
-        ww = int(1.5*w)
+        n, c, h, w = x.size()
+        x = x.view(-1, h, w)
+        hh = int(1.5 * h)
+        ww = int(1.5 * w)
        d = np.random.randint(2, h)
-        self.l = min(max(int(d*self.ratio+0.5),1),d-1)
+        self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
        mask = np.ones((hh, ww), np.float32)
        st_h = np.random.randint(d)
        st_w = np.random.randint(d)
        if self.use_h:
-            for i in range(hh//d):
-                s = d*i + st_h
-                t = min(s+self.l, hh)
-                mask[s:t,:] *= 0
+            for i in range(hh // d):
+                s = d * i + st_h
+                t = min(s + self.l, hh)
+                mask[s:t, :] *= 0
        if self.use_w:
-            for i in range(ww//d):
-                s = d*i + st_w
-                t = min(s+self.l, ww)
-                mask[:,s:t] *= 0
-       
+            for i in range(ww // d):
+                s = d * i + st_w
+                t = min(s + self.l, ww)
+                mask[:, s:t] *= 0
+
        r = np.random.randint(self.rotate)
        mask = Image.fromarray(np.uint8(mask))
        mask = mask.rotate(r)
        mask = np.asarray(mask)
-        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
+        mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2 + w]

        mask = torch.from_numpy(mask).to(x.dtype).cuda()
        if self.mode == 1:
-            mask = 1-mask
+            mask = 1 - mask
        mask = mask.expand_as(x)
        if self.offset:
-            offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()
+            offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).to(x.dtype).cuda()
            x = x * mask + offset * (1 - mask)
        else:
-            x = x * mask 
-        
-        return x.view(n,c,h,w)
\ No newline at end of file
+            x = x * mask
+
+        return x.view(n, c, h, w)
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/position_embedding.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/position_embedding.py
+import math
+
 import torch
 import torch.nn as nn
-import math
+

 class RelPositionEmbedding(nn.Module):
    def __init__(self, num_pos_feats=64, pos_norm=True):
        super().__init__()
        self.num_pos_feats = num_pos_feats
-        self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
-        #nn.init.orthogonal_(self.fc.weight)
-        #self.fc.weight.requires_grad = False
+        self.fc = nn.Linear(4, self.num_pos_feats, bias=False)
+        # nn.init.orthogonal_(self.fc.weight)
+        # self.fc.weight.requires_grad = False
        self.pos_norm = pos_norm
        if self.pos_norm:
            self.norm = nn.LayerNorm(self.num_pos_feats)
+
    def forward(self, tensor):
-        #mask = nesttensor.mask
-        B,C,H,W = tensor.shape
-        #print('tensor.shape',  tensor.shape)
+        # mask = nesttensor.mask
+        B, C, H, W = tensor.shape
+        # print('tensor.shape',  tensor.shape)
        y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
-        #y_axis = torch.stack((y_range, 1-y_range),dim=1)
+        # y_axis = torch.stack((y_range, 1-y_range),dim=1)
        y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
        y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)

        x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
-        #x_axis =torch.stack((x_range,1-x_range),dim=1)
+        # x_axis =torch.stack((x_range,1-x_range),dim=1)
        x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
        x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
        x_pos = torch.cat((y_axis, x_axis), dim=1)
@@ -30,5 +33,5 @@ class RelPositionEmbedding(nn.Module):

        if self.pos_norm:
            x_pos = self.norm(x_pos)
-        #print('xpos,', x_pos.max(),x_pos.min())
-        return x_pos
\ No newline at end of file
+        # print('xpos,', x_pos.max(),x_pos.min())
+        return x_pos
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/positional_encoding.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/positional_encoding.py
-import math
-
-import torch
-import torch.nn as nn
-from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
-from mmcv.runner import BaseModule
-
-
-@POSITIONAL_ENCODING.register_module()
-class LearnedPositionalEncoding3D(BaseModule):
-    """Position embedding with learnable embedding weights.
-
-    Args:
-        num_feats (int): The feature dimension for each position
-            along x-axis or y-axis. The final returned dimension for
-            each position is 2 times of this value.
-        row_num_embed (int, optional): The dictionary size of row embeddings.
-            Default 50.
-        col_num_embed (int, optional): The dictionary size of col embeddings.
-            Default 50.
-        init_cfg (dict or list[dict], optional): Initialization config dict.
-    """
-
-    def __init__(self,
-                 num_feats,
-                 row_num_embed=50,
-                 col_num_embed=50,
-                 height_num_embed=50,
-                 init_cfg=dict(type='Uniform', layer='Embedding')):
-        super(LearnedPositionalEncoding3D, self).__init__(init_cfg)
-        self.row_embed = nn.Embedding(row_num_embed, num_feats)
-        self.col_embed = nn.Embedding(col_num_embed, num_feats)
-        self.height_embed = nn.Embedding(height_num_embed, num_feats)
-        self.num_feats = num_feats
-        self.row_num_embed = row_num_embed
-        self.col_num_embed = col_num_embed
-        self.height_num_embed = height_num_embed
-
-    def forward(self, mask):
-        """Forward function for `LearnedPositionalEncoding`.
-
-        Args:
-            mask (Tensor): ByteTensor mask. Non-zero values representing
-                ignored positions, while zero values means valid positions
-                for this image. Shape [bs, h, w].
-
-        Returns:
-            pos (Tensor): Returned position embedding with shape
-                [bs, num_feats*2, h, w].
-        """
-        l, h, w = mask.shape[-3:]
-        x = torch.arange(w, device=mask.device)
-        y = torch.arange(h, device=mask.device)
-        z = torch.arange(l, device=mask.device)
-        x_embed = self.col_embed(x)
-        y_embed = self.row_embed(y)
-        z_embed = self.height_embed(z)
-        pos = torch.cat(
-            (x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1),
-             y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1),
-             z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)),dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(mask.shape[0],1, 1, 1, 1)
-        return pos
-
-    def __repr__(self):
-        """str: a string that describes the module"""
-        repr_str = self.__class__.__name__
-        repr_str += f'(num_feats={self.num_feats}, '
-        repr_str += f'row_num_embed={self.row_num_embed}, '
-        repr_str += f'col_num_embed={self.col_num_embed})'
-        repr_str += f'height_num_embed={self.height_num_embed})'
-        return repr_str
+import torch
+import torch.nn as nn
+from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
+from mmcv.runner import BaseModule
+
+
+@POSITIONAL_ENCODING.register_module()
+class LearnedPositionalEncoding3D(BaseModule):
+    """Position embedding with learnable embedding weights.
+
+    Args:
+        num_feats (int): The feature dimension for each position
+            along x-axis or y-axis. The final returned dimension for
+            each position is 2 times of this value.
+        row_num_embed (int, optional): The dictionary size of row embeddings.
+            Default 50.
+        col_num_embed (int, optional): The dictionary size of col embeddings.
+            Default 50.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+    """
+
+    def __init__(self,
+                 num_feats,
+                 row_num_embed=50,
+                 col_num_embed=50,
+                 height_num_embed=50,
+                 init_cfg=dict(type='Uniform', layer='Embedding')):
+        super(LearnedPositionalEncoding3D, self).__init__(init_cfg)
+        self.row_embed = nn.Embedding(row_num_embed, num_feats)
+        self.col_embed = nn.Embedding(col_num_embed, num_feats)
+        self.height_embed = nn.Embedding(height_num_embed, num_feats)
+        self.num_feats = num_feats
+        self.row_num_embed = row_num_embed
+        self.col_num_embed = col_num_embed
+        self.height_num_embed = height_num_embed
+
+    def forward(self, mask):
+        """Forward function for `LearnedPositionalEncoding`.
+
+        Args:
+            mask (Tensor): ByteTensor mask. Non-zero values representing
+                ignored positions, while zero values means valid positions
+                for this image. Shape [bs, h, w].
+
+        Returns:
+            pos (Tensor): Returned position embedding with shape
+                [bs, num_feats*2, h, w].
+        """
+        l, h, w = mask.shape[-3:]
+        x = torch.arange(w, device=mask.device)
+        y = torch.arange(h, device=mask.device)
+        z = torch.arange(l, device=mask.device)
+        x_embed = self.col_embed(x)
+        y_embed = self.row_embed(y)
+        z_embed = self.height_embed(z)
+        pos = torch.cat(
+            (x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1),
+             y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1),
+             z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)), dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(
+            mask.shape[0], 1, 1, 1, 1)
+        return pos
+
+    def __repr__(self):
+        """str: a string that describes the module"""
+        repr_str = self.__class__.__name__
+        repr_str += f'(num_feats={self.num_feats}, '
+        repr_str += f'row_num_embed={self.row_num_embed}, '
+        repr_str += f'col_num_embed={self.col_num_embed})'
+        repr_str += f'height_num_embed={self.height_num_embed})'
+        return repr_str
--- a/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/visual.py
+++ b/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/visual.py
+import cv2
+import matplotlib.pyplot as plt
 import torch
-from torchvision.utils import make_grid
 import torchvision
-import matplotlib.pyplot as plt
-import cv2
+from torchvision.utils import make_grid


 def convert_color(img_path):
@@ -12,11 +12,11 @@ def convert_color(img_path):
    plt.close()


-def save_tensor(tensor, path, pad_value=254.0,):
+def save_tensor(tensor, path, pad_value=254.0, ):
    print('save_tensor', path)
    tensor = tensor.to(torch.float).detach().cpu()
    if tensor.type() == 'torch.BoolTensor':
-        tensor = tensor*255
+        tensor = tensor * 255
    if len(tensor.shape) == 3:
        tensor = tensor.unsqueeze(1)
    tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()

--- a/autonomous_driving/occupancy_prediction/tools/.ipynb_checkpoints/train-checkpoint.py
+++ b/autonomous_driving/occupancy_prediction/tools/.ipynb_checkpoints/train-checkpoint.py
@@ -3,31 +3,30 @@
 # ---------------------------------------------
 #  Modified by Zhiqi Li
 # ---------------------------------------------
- 
+
 from __future__ import division

 import argparse
 import copy
-import mmcv
 import os
 import time
-import torch
 import warnings
-from mmcv import Config, DictAction
-from mmcv.runner import get_dist_info, init_dist
 from os import path as osp

+import mmcv
+import torch
+from mmcv import Config, DictAction
+from mmcv.runner import get_dist_info, init_dist
+from mmcv.utils import TORCH_VERSION, digit_version
 from mmdet import __version__ as mmdet_version
 from mmdet3d import __version__ as mmdet3d_version
-#from mmdet3d.apis import train_model
-
 from mmdet3d.datasets import build_dataset
 from mmdet3d.models import build_model
 from mmdet3d.utils import collect_env, get_root_logger
 from mmdet.apis import set_random_seed
 from mmseg import __version__ as mmseg_version

-from mmcv.utils import TORCH_VERSION, digit_version
+# from mmdet3d.apis import train_model


 def parse_args():
@@ -45,13 +44,13 @@ def parse_args():
        '--gpus',
        type=int,
        help='number of gpus to use '
-        '(only applicable to non-distributed training)')
+             '(only applicable to non-distributed training)')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
-        '(only applicable to non-distributed training)')
+             '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
    parser.add_argument(
        '--deterministic',
@@ -62,18 +61,18 @@ def parse_args():
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
-        'in xxx=yyy format will be merged into config file (deprecate), '
-        'change to --cfg-options instead.')
+             'in xxx=yyy format will be merged into config file (deprecate), '
+             'change to --cfg-options instead.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
-        'in xxx=yyy format will be merged into config file. If the value to '
-        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
-        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
-        'Note that the quotation marks are necessary and that no white space '
-        'is allowed.')
+             'in xxx=yyy format will be merged into config file. If the value to '
+             'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+             'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+             'Note that the quotation marks are necessary and that no white space '
+             'is allowed.')
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
@@ -134,7 +133,8 @@ def main():
                print(_module_path)
                plg_lib = importlib.import_module(_module_path)

-            from projects.mmdet3d_plugin.bevformer.apis.train import custom_train_model
+            from projects.mmdet3d_plugin.bevformer.apis.train import \
+                custom_train_model
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
@@ -155,7 +155,7 @@ def main():
    else:
        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
    if digit_version(TORCH_VERSION) == digit_version('1.8.1') and cfg.optimizer['type'] == 'AdamW':
-        cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw
+        cfg.optimizer['type'] = 'AdamW2'  # fix bug in Adamw
    if args.autoscale_lr:
        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8

--- a/autonomous_driving/occupancy_prediction/tools/analysis_tools/analyze_logs.py
+++ b/autonomous_driving/occupancy_prediction/tools/analysis_tools/analyze_logs.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
 import json
+from collections import defaultdict
+
 import numpy as np
 import seaborn as sns
-from collections import defaultdict
 from matplotlib import pyplot as plt


@@ -65,7 +66,7 @@ def plot_curve(log_dicts, args):
                    else:
                        # find the first epoch that do eval
                        x0 = min(epochs) + args.interval - \
-                            min(epochs) % args.interval
+                             min(epochs) % args.interval
                xs = np.arange(x0, max(epochs) + 1, args.interval)
                ys = []
                for epoch in epochs[args.interval - 1::args.interval]:
@@ -85,7 +86,7 @@ def plot_curve(log_dicts, args):
                xs = []
                ys = []
                num_iters_per_epoch = \
-                    log_dict[epochs[args.interval-1]]['iter'][-1]
+                    log_dict[epochs[args.interval - 1]]['iter'][-1]
                for epoch in epochs[args.interval - 1::args.interval]:
                    iters = log_dict[epoch]['iter']
                    if log_dict[epoch]['mode'][-1] == 'val':
@@ -152,7 +153,7 @@ def add_time_parser(subparsers):
        '--include-outliers',
        action='store_true',
        help='include the first value of every epoch when computing '
-        'the average time')
+             'the average time')


 def parse_args():

--- a/autonomous_driving/occupancy_prediction/tools/analysis_tools/benchmark.py
+++ b/autonomous_driving/occupancy_prediction/tools/analysis_tools/benchmark.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
+import sys
 import time
+
 import torch
 from mmcv import Config
 from mmcv.parallel import MMDataParallel
 from mmcv.runner import load_checkpoint, wrap_fp16_model
-import sys
+
 sys.path.append('.')
-from projects.mmdet3d_plugin.datasets.builder import build_dataloader
-from projects.mmdet3d_plugin.datasets import custom_build_dataset
 # from mmdet3d.datasets import build_dataloader, build_dataset
 from mmdet3d.models import build_detector
-#from tools.misc.fuse_conv_bn import fuse_module
+from projects.mmdet3d_plugin.datasets import custom_build_dataset
+from projects.mmdet3d_plugin.datasets.builder import build_dataloader
+
+# from tools.misc.fuse_conv_bn import fuse_module


 def parse_args():
@@ -25,7 +28,7 @@ def parse_args():
        '--fuse-conv-bn',
        action='store_true',
        help='Whether to fuse conv and bn, this will slightly increase'
-        'the inference speed')
+             'the inference speed')
    args = parser.parse_args()
    return args

@@ -59,7 +62,7 @@ def main():
        wrap_fp16_model(model)
    if args.checkpoint is not None:
        load_checkpoint(model, args.checkpoint, map_location='cpu')
-    #if args.fuse_conv_bn:
+    # if args.fuse_conv_bn:
    #    model = fuse_module(model)

    model = MMDataParallel(model, device_ids=[0])

--- a/autonomous_driving/occupancy_prediction/tools/analysis_tools/get_params.py
+++ b/autonomous_driving/occupancy_prediction/tools/analysis_tools/get_params.py
-import torch
-file_path = './ckpts/bevformer_v4.pth'
-model = torch.load(file_path, map_location='cpu')
-all = 0
-for key in list(model['state_dict'].keys()):
-    all += model['state_dict'][key].nelement()
-print(all)
-
-# smaller 63374123
-# v4 69140395
+import torch
+
+file_path = './ckpts/bevformer_v4.pth'
+model = torch.load(file_path, map_location='cpu')
+all = 0
+for key in list(model['state_dict'].keys()):
+    all += model['state_dict'][key].nelement()
+print(all)
+
+# smaller 63374123
+# v4 69140395
--- a/autonomous_driving/occupancy_prediction/tools/analysis_tools/visual.py
+++ b/autonomous_driving/occupancy_prediction/tools/analysis_tools/visual.py
-# Based on https://github.com/nutonomy/nuscenes-devkit
-# ---------------------------------------------
-#  Modified by Zhiqi Li
-# ---------------------------------------------
-
-import mmcv
-from nuscenes.nuscenes import NuScenes
-from PIL import Image
-from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
-from typing import Tuple, List, Iterable
-import matplotlib.pyplot as plt
-import numpy as np
-from PIL import Image
-from matplotlib import rcParams
-from matplotlib.axes import Axes
-from pyquaternion import Quaternion
-from PIL import Image
-from matplotlib import rcParams
-from matplotlib.axes import Axes
-from pyquaternion import Quaternion
-from tqdm import tqdm
-from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
-from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
-from nuscenes.eval.common.data_classes import EvalBoxes, EvalBox
-from nuscenes.eval.detection.data_classes import DetectionBox
-from nuscenes.eval.detection.utils import category_to_detection_name
-from nuscenes.eval.detection.render import visualize_sample
-
-
-
-
-cams = ['CAM_FRONT',
- 'CAM_FRONT_RIGHT',
- 'CAM_BACK_RIGHT',
- 'CAM_BACK',
- 'CAM_BACK_LEFT',
- 'CAM_FRONT_LEFT']
-
-import numpy as np
-import matplotlib.pyplot as plt
-from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
-from PIL import Image
-from matplotlib import rcParams
-
-
-def render_annotation(
-        anntoken: str,
-        margin: float = 10,
-        view: np.ndarray = np.eye(4),
-        box_vis_level: BoxVisibility = BoxVisibility.ANY,
-        out_path: str = 'render.png',
-        extra_info: bool = False) -> None:
-    """
-    Render selected annotation.
-    :param anntoken: Sample_annotation token.
-    :param margin: How many meters in each direction to include in LIDAR view.
-    :param view: LIDAR view point.
-    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
-    :param out_path: Optional path to save the rendered figure to disk.
-    :param extra_info: Whether to render extra information below camera view.
-    """
-    ann_record = nusc.get('sample_annotation', anntoken)
-    sample_record = nusc.get('sample', ann_record['sample_token'])
-    assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
-
-    # Figure out which camera the object is fully visible in (this may return nothing).
-    boxes, cam = [], []
-    cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
-    all_bboxes = []
-    select_cams = []
-    for cam in cams:
-        _, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
-                                           selected_anntokens=[anntoken])
-        if len(boxes) > 0:
-            all_bboxes.append(boxes)
-            select_cams.append(cam)
-            # We found an image that matches. Let's abort.
-    # assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
-    #                      'Try using e.g. BoxVisibility.ANY.'
-    # assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
-
-    num_cam = len(all_bboxes)
-
-    fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
-    select_cams = [sample_record['data'][cam] for cam in select_cams]
-    print('bbox in cams:', select_cams)
-    # Plot LIDAR view.
-    lidar = sample_record['data']['LIDAR_TOP']
-    data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
-    LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
-    for box in boxes:
-        c = np.array(get_color(box.name)) / 255.0
-        box.render(axes[0], view=view, colors=(c, c, c))
-        corners = view_points(boxes[0].corners(), view, False)[:2, :]
-        axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
-        axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
-        axes[0].axis('off')
-        axes[0].set_aspect('equal')
-
-    # Plot CAMERA view.
-    for i in range(1, num_cam + 1):
-        cam = select_cams[i - 1]
-        data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
-        im = Image.open(data_path)
-        axes[i].imshow(im)
-        axes[i].set_title(nusc.get('sample_data', cam)['channel'])
-        axes[i].axis('off')
-        axes[i].set_aspect('equal')
-        for box in boxes:
-            c = np.array(get_color(box.name)) / 255.0
-            box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
-
-        # Print extra information about the annotation below the camera view.
-        axes[i].set_xlim(0, im.size[0])
-        axes[i].set_ylim(im.size[1], 0)
-
-    if extra_info:
-        rcParams['font.family'] = 'monospace'
-
-        w, l, h = ann_record['size']
-        category = ann_record['category_name']
-        lidar_points = ann_record['num_lidar_pts']
-        radar_points = ann_record['num_radar_pts']
-
-        sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
-        pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
-        dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
-
-        information = ' \n'.join(['category: {}'.format(category),
-                                  '',
-                                  '# lidar points: {0:>4}'.format(lidar_points),
-                                  '# radar points: {0:>4}'.format(radar_points),
-                                  '',
-                                  'distance: {:>7.3f}m'.format(dist),
-                                  '',
-                                  'width:  {:>7.3f}m'.format(w),
-                                  'length: {:>7.3f}m'.format(l),
-                                  'height: {:>7.3f}m'.format(h)])
-
-        plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
-
-    if out_path is not None:
-        plt.savefig(out_path)
-
-
-
-def get_sample_data(sample_data_token: str,
-                    box_vis_level: BoxVisibility = BoxVisibility.ANY,
-                    selected_anntokens=None,
-                    use_flat_vehicle_coordinates: bool = False):
-    """
-    Returns the data path as well as all annotations related to that sample_data.
-    Note that the boxes are transformed into the current sensor's coordinate frame.
-    :param sample_data_token: Sample_data token.
-    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
-    :param selected_anntokens: If provided only return the selected annotation.
-    :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
-                                         aligned to z-plane in the world.
-    :return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
-    """
-
-    # Retrieve sensor & pose records
-    sd_record = nusc.get('sample_data', sample_data_token)
-    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
-    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
-    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
-
-    data_path = nusc.get_sample_data_path(sample_data_token)
-
-    if sensor_record['modality'] == 'camera':
-        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
-        imsize = (sd_record['width'], sd_record['height'])
-    else:
-        cam_intrinsic = None
-        imsize = None
-
-    # Retrieve all sample annotations and map to sensor coordinate system.
-    if selected_anntokens is not None:
-        boxes = list(map(nusc.get_box, selected_anntokens))
-    else:
-        boxes = nusc.get_boxes(sample_data_token)
-
-    # Make list of Box objects including coord system transforms.
-    box_list = []
-    for box in boxes:
-        if use_flat_vehicle_coordinates:
-            # Move box to ego vehicle coord system parallel to world z plane.
-            yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
-            box.translate(-np.array(pose_record['translation']))
-            box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
-        else:
-            # Move box to ego vehicle coord system.
-            box.translate(-np.array(pose_record['translation']))
-            box.rotate(Quaternion(pose_record['rotation']).inverse)
-
-            #  Move box to sensor coord system.
-            box.translate(-np.array(cs_record['translation']))
-            box.rotate(Quaternion(cs_record['rotation']).inverse)
-
-        if sensor_record['modality'] == 'camera' and not \
-                box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
-            continue
-
-        box_list.append(box)
-
-    return data_path, box_list, cam_intrinsic
-
-
-
-def get_predicted_data(sample_data_token: str,
-                       box_vis_level: BoxVisibility = BoxVisibility.ANY,
-                       selected_anntokens=None,
-                       use_flat_vehicle_coordinates: bool = False,
-                       pred_anns=None
-                       ):
-    """
-    Returns the data path as well as all annotations related to that sample_data.
-    Note that the boxes are transformed into the current sensor's coordinate frame.
-    :param sample_data_token: Sample_data token.
-    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
-    :param selected_anntokens: If provided only return the selected annotation.
-    :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
-                                         aligned to z-plane in the world.
-    :return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
-    """
-
-    # Retrieve sensor & pose records
-    sd_record = nusc.get('sample_data', sample_data_token)
-    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
-    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
-    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
-
-    data_path = nusc.get_sample_data_path(sample_data_token)
-
-    if sensor_record['modality'] == 'camera':
-        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
-        imsize = (sd_record['width'], sd_record['height'])
-    else:
-        cam_intrinsic = None
-        imsize = None
-
-    # Retrieve all sample annotations and map to sensor coordinate system.
-    # if selected_anntokens is not None:
-    #    boxes = list(map(nusc.get_box, selected_anntokens))
-    # else:
-    #    boxes = nusc.get_boxes(sample_data_token)
-    boxes = pred_anns
-    # Make list of Box objects including coord system transforms.
-    box_list = []
-    for box in boxes:
-        if use_flat_vehicle_coordinates:
-            # Move box to ego vehicle coord system parallel to world z plane.
-            yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
-            box.translate(-np.array(pose_record['translation']))
-            box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
-        else:
-            # Move box to ego vehicle coord system.
-            box.translate(-np.array(pose_record['translation']))
-            box.rotate(Quaternion(pose_record['rotation']).inverse)
-
-            #  Move box to sensor coord system.
-            box.translate(-np.array(cs_record['translation']))
-            box.rotate(Quaternion(cs_record['rotation']).inverse)
-
-        if sensor_record['modality'] == 'camera' and not \
-                box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
-            continue
-        box_list.append(box)
-
-    return data_path, box_list, cam_intrinsic
-
-
-
-
-def lidiar_render(sample_token, data,out_path=None):
-    bbox_gt_list = []
-    bbox_pred_list = []
-    anns = nusc.get('sample', sample_token)['anns']
-    for ann in anns:
-        content = nusc.get('sample_annotation', ann)
-        try:
-            bbox_gt_list.append(DetectionBox(
-                sample_token=content['sample_token'],
-                translation=tuple(content['translation']),
-                size=tuple(content['size']),
-                rotation=tuple(content['rotation']),
-                velocity=nusc.box_velocity(content['token'])[:2],
-                ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
-                else tuple(content['ego_translation']),
-                num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
-                detection_name=category_to_detection_name(content['category_name']),
-                detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
-                attribute_name=''))
-        except:
-            pass
-
-    bbox_anns = data['results'][sample_token]
-    for content in bbox_anns:
-        bbox_pred_list.append(DetectionBox(
-            sample_token=content['sample_token'],
-            translation=tuple(content['translation']),
-            size=tuple(content['size']),
-            rotation=tuple(content['rotation']),
-            velocity=tuple(content['velocity']),
-            ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
-            else tuple(content['ego_translation']),
-            num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
-            detection_name=content['detection_name'],
-            detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
-            attribute_name=content['attribute_name']))
-    gt_annotations = EvalBoxes()
-    pred_annotations = EvalBoxes()
-    gt_annotations.add_boxes(sample_token, bbox_gt_list)
-    pred_annotations.add_boxes(sample_token, bbox_pred_list)
-    print('green is ground truth')
-    print('blue is the predited result')
-    visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path+'_bev')
-
-
-def get_color(category_name: str):
-    """
-    Provides the default colors based on the category names.
-    This method works for the general nuScenes categories, as well as the nuScenes detection categories.
-    """
-    a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
-     'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
-     'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
-     'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack', 'vehicle.bicycle',
-     'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
-     'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
-     'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
-     'vehicle.ego']
-    class_names = [
-        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
-        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
-    ]
-    #print(category_name)
-    if category_name == 'bicycle':
-        return nusc.colormap['vehicle.bicycle']
-    elif category_name == 'construction_vehicle':
-        return nusc.colormap['vehicle.construction']
-    elif category_name == 'traffic_cone':
-        return nusc.colormap['movable_object.trafficcone']
-
-    for key in nusc.colormap.keys():
-        if category_name in key:
-            return nusc.colormap[key]
-    return [0, 0, 0]
-
-
-def render_sample_data(
-        sample_toekn: str,
-        with_anns: bool = True,
-        box_vis_level: BoxVisibility = BoxVisibility.ANY,
-        axes_limit: float = 40,
-        ax=None,
-        nsweeps: int = 1,
-        out_path: str = None,
-        underlay_map: bool = True,
-        use_flat_vehicle_coordinates: bool = True,
-        show_lidarseg: bool = False,
-        show_lidarseg_legend: bool = False,
-        filter_lidarseg_labels=None,
-        lidarseg_preds_bin_path: str = None,
-        verbose: bool = True,
-        show_panoptic: bool = False,
-        pred_data=None,
-      ) -> None:
-    """
-    Render sample data onto axis.
-    :param sample_data_token: Sample_data token.
-    :param with_anns: Whether to draw box annotations.
-    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
-    :param axes_limit: Axes limit for lidar and radar (measured in meters).
-    :param ax: Axes onto which to render.
-    :param nsweeps: Number of sweeps for lidar and radar.
-    :param out_path: Optional path to save the rendered figure to disk.
-    :param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
-    :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
-        aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
-        can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
-        setting is more correct and rotates the plot by ~90 degrees.
-    :param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
-        to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
-    :param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
-    :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
-        or the list is empty, all classes will be displayed.
-    :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
-                                    predictions for the sample.
-    :param verbose: Whether to display the image after it is rendered.
-    :param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
-        to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
-        If show_lidarseg is True, show_panoptic will be set to False.
-    """
-    lidiar_render(sample_toekn, pred_data, out_path=out_path)
-    sample = nusc.get('sample', sample_toekn)
-    # sample = data['results'][sample_token_list[0]][0]
-    cams = [
-        'CAM_FRONT_LEFT',
-        'CAM_FRONT',
-        'CAM_FRONT_RIGHT',
-        'CAM_BACK_LEFT',
-        'CAM_BACK',
-        'CAM_BACK_RIGHT',
-    ]
-    if ax is None:
-        _, ax = plt.subplots(4, 3, figsize=(24, 18))
-    j = 0
-    for ind, cam in enumerate(cams):
-        sample_data_token = sample['data'][cam]
-
-        sd_record = nusc.get('sample_data', sample_data_token)
-        sensor_modality = sd_record['sensor_modality']
-
-        if sensor_modality in ['lidar', 'radar']:
-            assert False
-        elif sensor_modality == 'camera':
-            # Load boxes and image.
-            boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
-                         name=record['detection_name'], token='predicted') for record in
-                     pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
-
-            data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
-                                                                         box_vis_level=box_vis_level, pred_anns=boxes)
-            _, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
-            if ind == 3:
-                j += 1
-            ind = ind % 3
-            data = Image.open(data_path)
-            # mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
-            # Init axes.
-
-            # Show image.
-            ax[j, ind].imshow(data)
-            ax[j + 2, ind].imshow(data)
-
-            # Show boxes.
-            if with_anns:
-                for box in boxes_pred:
-                    c = np.array(get_color(box.name)) / 255.0
-                    box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
-                for box in boxes_gt:
-                    c = np.array(get_color(box.name)) / 255.0
-                    box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
-
-            # Limit visible range.
-            ax[j, ind].set_xlim(0, data.size[0])
-            ax[j, ind].set_ylim(data.size[1], 0)
-            ax[j + 2, ind].set_xlim(0, data.size[0])
-            ax[j + 2, ind].set_ylim(data.size[1], 0)
-
-        else:
-            raise ValueError("Error: Unknown sensor modality!")
-
-        ax[j, ind].axis('off')
-        ax[j, ind].set_title('PRED: {} {labels_type}'.format(
-            sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
-        ax[j, ind].set_aspect('equal')
-
-        ax[j + 2, ind].axis('off')
-        ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
-            sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
-        ax[j + 2, ind].set_aspect('equal')
-
-    if out_path is not None:
-        plt.savefig(out_path+'_camera', bbox_inches='tight', pad_inches=0, dpi=200)
-    if verbose:
-        plt.show()
-    plt.close()
-
-if __name__ == '__main__':
-    nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
-    # render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
-    bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
-    sample_token_list = list(bevformer_results['results'].keys())
-    for id in range(0, 10):
-        render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
+# Based on https://github.com/nutonomy/nuscenes-devkit
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+
+import mmcv
+from nuscenes.eval.common.data_classes import EvalBoxes
+from nuscenes.eval.detection.data_classes import DetectionBox
+from nuscenes.eval.detection.render import visualize_sample
+from nuscenes.eval.detection.utils import category_to_detection_name
+from nuscenes.nuscenes import NuScenes
+from nuscenes.utils.geometry_utils import (BoxVisibility, box_in_image,
+                                           view_points)
+from PIL import Image
+from pyquaternion import Quaternion
+
+cams = ['CAM_FRONT',
+        'CAM_FRONT_RIGHT',
+        'CAM_BACK_RIGHT',
+        'CAM_BACK',
+        'CAM_BACK_LEFT',
+        'CAM_FRONT_LEFT']
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import rcParams
+from nuscenes.utils.data_classes import Box, LidarPointCloud
+from PIL import Image
+
+
+def render_annotation(
+        anntoken: str,
+        margin: float = 10,
+        view: np.ndarray = np.eye(4),
+        box_vis_level: BoxVisibility = BoxVisibility.ANY,
+        out_path: str = 'render.png',
+        extra_info: bool = False) -> None:
+    """
+    Render selected annotation.
+    :param anntoken: Sample_annotation token.
+    :param margin: How many meters in each direction to include in LIDAR view.
+    :param view: LIDAR view point.
+    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
+    :param out_path: Optional path to save the rendered figure to disk.
+    :param extra_info: Whether to render extra information below camera view.
+    """
+    ann_record = nusc.get('sample_annotation', anntoken)
+    sample_record = nusc.get('sample', ann_record['sample_token'])
+    assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
+
+    # Figure out which camera the object is fully visible in (this may return nothing).
+    boxes, cam = [], []
+    cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
+    all_bboxes = []
+    select_cams = []
+    for cam in cams:
+        _, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
+                                           selected_anntokens=[anntoken])
+        if len(boxes) > 0:
+            all_bboxes.append(boxes)
+            select_cams.append(cam)
+            # We found an image that matches. Let's abort.
+    # assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
+    #                      'Try using e.g. BoxVisibility.ANY.'
+    # assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
+
+    num_cam = len(all_bboxes)
+
+    fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
+    select_cams = [sample_record['data'][cam] for cam in select_cams]
+    print('bbox in cams:', select_cams)
+    # Plot LIDAR view.
+    lidar = sample_record['data']['LIDAR_TOP']
+    data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
+    LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
+    for box in boxes:
+        c = np.array(get_color(box.name)) / 255.0
+        box.render(axes[0], view=view, colors=(c, c, c))
+        corners = view_points(boxes[0].corners(), view, False)[:2, :]
+        axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
+        axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
+        axes[0].axis('off')
+        axes[0].set_aspect('equal')
+
+    # Plot CAMERA view.
+    for i in range(1, num_cam + 1):
+        cam = select_cams[i - 1]
+        data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
+        im = Image.open(data_path)
+        axes[i].imshow(im)
+        axes[i].set_title(nusc.get('sample_data', cam)['channel'])
+        axes[i].axis('off')
+        axes[i].set_aspect('equal')
+        for box in boxes:
+            c = np.array(get_color(box.name)) / 255.0
+            box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
+
+        # Print extra information about the annotation below the camera view.
+        axes[i].set_xlim(0, im.size[0])
+        axes[i].set_ylim(im.size[1], 0)
+
+    if extra_info:
+        rcParams['font.family'] = 'monospace'
+
+        w, l, h = ann_record['size']
+        category = ann_record['category_name']
+        lidar_points = ann_record['num_lidar_pts']
+        radar_points = ann_record['num_radar_pts']
+
+        sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
+        pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
+        dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
+
+        information = ' \n'.join(['category: {}'.format(category),
+                                  '',
+                                  '# lidar points: {0:>4}'.format(lidar_points),
+                                  '# radar points: {0:>4}'.format(radar_points),
+                                  '',
+                                  'distance: {:>7.3f}m'.format(dist),
+                                  '',
+                                  'width:  {:>7.3f}m'.format(w),
+                                  'length: {:>7.3f}m'.format(l),
+                                  'height: {:>7.3f}m'.format(h)])
+
+        plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
+
+    if out_path is not None:
+        plt.savefig(out_path)
+
+
+def get_sample_data(sample_data_token: str,
+                    box_vis_level: BoxVisibility = BoxVisibility.ANY,
+                    selected_anntokens=None,
+                    use_flat_vehicle_coordinates: bool = False):
+    """
+    Returns the data path as well as all annotations related to that sample_data.
+    Note that the boxes are transformed into the current sensor's coordinate frame.
+    :param sample_data_token: Sample_data token.
+    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
+    :param selected_anntokens: If provided only return the selected annotation.
+    :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
+                                         aligned to z-plane in the world.
+    :return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
+    """
+
+    # Retrieve sensor & pose records
+    sd_record = nusc.get('sample_data', sample_data_token)
+    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
+    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
+
+    data_path = nusc.get_sample_data_path(sample_data_token)
+
+    if sensor_record['modality'] == 'camera':
+        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
+        imsize = (sd_record['width'], sd_record['height'])
+    else:
+        cam_intrinsic = None
+        imsize = None
+
+    # Retrieve all sample annotations and map to sensor coordinate system.
+    if selected_anntokens is not None:
+        boxes = list(map(nusc.get_box, selected_anntokens))
+    else:
+        boxes = nusc.get_boxes(sample_data_token)
+
+    # Make list of Box objects including coord system transforms.
+    box_list = []
+    for box in boxes:
+        if use_flat_vehicle_coordinates:
+            # Move box to ego vehicle coord system parallel to world z plane.
+            yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
+            box.translate(-np.array(pose_record['translation']))
+            box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
+        else:
+            # Move box to ego vehicle coord system.
+            box.translate(-np.array(pose_record['translation']))
+            box.rotate(Quaternion(pose_record['rotation']).inverse)
+
+            #  Move box to sensor coord system.
+            box.translate(-np.array(cs_record['translation']))
+            box.rotate(Quaternion(cs_record['rotation']).inverse)
+
+        if sensor_record['modality'] == 'camera' and not \
+                box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
+            continue
+
+        box_list.append(box)
+
+    return data_path, box_list, cam_intrinsic
+
+
+def get_predicted_data(sample_data_token: str,
+                       box_vis_level: BoxVisibility = BoxVisibility.ANY,
+                       selected_anntokens=None,
+                       use_flat_vehicle_coordinates: bool = False,
+                       pred_anns=None
+                       ):
+    """
+    Returns the data path as well as all annotations related to that sample_data.
+    Note that the boxes are transformed into the current sensor's coordinate frame.
+    :param sample_data_token: Sample_data token.
+    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
+    :param selected_anntokens: If provided only return the selected annotation.
+    :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
+                                         aligned to z-plane in the world.
+    :return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
+    """
+
+    # Retrieve sensor & pose records
+    sd_record = nusc.get('sample_data', sample_data_token)
+    cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
+    sensor_record = nusc.get('sensor', cs_record['sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
+
+    data_path = nusc.get_sample_data_path(sample_data_token)
+
+    if sensor_record['modality'] == 'camera':
+        cam_intrinsic = np.array(cs_record['camera_intrinsic'])
+        imsize = (sd_record['width'], sd_record['height'])
+    else:
+        cam_intrinsic = None
+        imsize = None
+
+    # Retrieve all sample annotations and map to sensor coordinate system.
+    # if selected_anntokens is not None:
+    #    boxes = list(map(nusc.get_box, selected_anntokens))
+    # else:
+    #    boxes = nusc.get_boxes(sample_data_token)
+    boxes = pred_anns
+    # Make list of Box objects including coord system transforms.
+    box_list = []
+    for box in boxes:
+        if use_flat_vehicle_coordinates:
+            # Move box to ego vehicle coord system parallel to world z plane.
+            yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
+            box.translate(-np.array(pose_record['translation']))
+            box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
+        else:
+            # Move box to ego vehicle coord system.
+            box.translate(-np.array(pose_record['translation']))
+            box.rotate(Quaternion(pose_record['rotation']).inverse)
+
+            #  Move box to sensor coord system.
+            box.translate(-np.array(cs_record['translation']))
+            box.rotate(Quaternion(cs_record['rotation']).inverse)
+
+        if sensor_record['modality'] == 'camera' and not \
+                box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
+            continue
+        box_list.append(box)
+
+    return data_path, box_list, cam_intrinsic
+
+
+def lidiar_render(sample_token, data, out_path=None):
+    bbox_gt_list = []
+    bbox_pred_list = []
+    anns = nusc.get('sample', sample_token)['anns']
+    for ann in anns:
+        content = nusc.get('sample_annotation', ann)
+        try:
+            bbox_gt_list.append(DetectionBox(
+                sample_token=content['sample_token'],
+                translation=tuple(content['translation']),
+                size=tuple(content['size']),
+                rotation=tuple(content['rotation']),
+                velocity=nusc.box_velocity(content['token'])[:2],
+                ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
+                else tuple(content['ego_translation']),
+                num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
+                detection_name=category_to_detection_name(content['category_name']),
+                detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
+                attribute_name=''))
+        except:
+            pass
+
+    bbox_anns = data['results'][sample_token]
+    for content in bbox_anns:
+        bbox_pred_list.append(DetectionBox(
+            sample_token=content['sample_token'],
+            translation=tuple(content['translation']),
+            size=tuple(content['size']),
+            rotation=tuple(content['rotation']),
+            velocity=tuple(content['velocity']),
+            ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
+            else tuple(content['ego_translation']),
+            num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
+            detection_name=content['detection_name'],
+            detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
+            attribute_name=content['attribute_name']))
+    gt_annotations = EvalBoxes()
+    pred_annotations = EvalBoxes()
+    gt_annotations.add_boxes(sample_token, bbox_gt_list)
+    pred_annotations.add_boxes(sample_token, bbox_pred_list)
+    print('green is ground truth')
+    print('blue is the predited result')
+    visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path + '_bev')
+
+
+def get_color(category_name: str):
+    """
+    Provides the default colors based on the category names.
+    This method works for the general nuScenes categories, as well as the nuScenes detection categories.
+    """
+    a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
+         'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
+         'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
+         'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack',
+         'vehicle.bicycle',
+         'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
+         'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
+         'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
+         'vehicle.ego']
+    class_names = [
+        'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+        'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+    ]
+    # print(category_name)
+    if category_name == 'bicycle':
+        return nusc.colormap['vehicle.bicycle']
+    elif category_name == 'construction_vehicle':
+        return nusc.colormap['vehicle.construction']
+    elif category_name == 'traffic_cone':
+        return nusc.colormap['movable_object.trafficcone']
+
+    for key in nusc.colormap.keys():
+        if category_name in key:
+            return nusc.colormap[key]
+    return [0, 0, 0]
+
+
+def render_sample_data(
+        sample_toekn: str,
+        with_anns: bool = True,
+        box_vis_level: BoxVisibility = BoxVisibility.ANY,
+        axes_limit: float = 40,
+        ax=None,
+        nsweeps: int = 1,
+        out_path: str = None,
+        underlay_map: bool = True,
+        use_flat_vehicle_coordinates: bool = True,
+        show_lidarseg: bool = False,
+        show_lidarseg_legend: bool = False,
+        filter_lidarseg_labels=None,
+        lidarseg_preds_bin_path: str = None,
+        verbose: bool = True,
+        show_panoptic: bool = False,
+        pred_data=None,
+) -> None:
+    """
+    Render sample data onto axis.
+    :param sample_data_token: Sample_data token.
+    :param with_anns: Whether to draw box annotations.
+    :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
+    :param axes_limit: Axes limit for lidar and radar (measured in meters).
+    :param ax: Axes onto which to render.
+    :param nsweeps: Number of sweeps for lidar and radar.
+    :param out_path: Optional path to save the rendered figure to disk.
+    :param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
+    :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
+        aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
+        can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
+        setting is more correct and rotates the plot by ~90 degrees.
+    :param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
+        to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
+    :param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
+    :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
+        or the list is empty, all classes will be displayed.
+    :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
+                                    predictions for the sample.
+    :param verbose: Whether to display the image after it is rendered.
+    :param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
+        to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
+        If show_lidarseg is True, show_panoptic will be set to False.
+    """
+    lidiar_render(sample_toekn, pred_data, out_path=out_path)
+    sample = nusc.get('sample', sample_toekn)
+    # sample = data['results'][sample_token_list[0]][0]
+    cams = [
+        'CAM_FRONT_LEFT',
+        'CAM_FRONT',
+        'CAM_FRONT_RIGHT',
+        'CAM_BACK_LEFT',
+        'CAM_BACK',
+        'CAM_BACK_RIGHT',
+    ]
+    if ax is None:
+        _, ax = plt.subplots(4, 3, figsize=(24, 18))
+    j = 0
+    for ind, cam in enumerate(cams):
+        sample_data_token = sample['data'][cam]
+
+        sd_record = nusc.get('sample_data', sample_data_token)
+        sensor_modality = sd_record['sensor_modality']
+
+        if sensor_modality in ['lidar', 'radar']:
+            assert False
+        elif sensor_modality == 'camera':
+            # Load boxes and image.
+            boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
+                         name=record['detection_name'], token='predicted') for record in
+                     pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
+
+            data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
+                                                                         box_vis_level=box_vis_level, pred_anns=boxes)
+            _, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
+            if ind == 3:
+                j += 1
+            ind = ind % 3
+            data = Image.open(data_path)
+            # mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
+            # Init axes.
+
+            # Show image.
+            ax[j, ind].imshow(data)
+            ax[j + 2, ind].imshow(data)
+
+            # Show boxes.
+            if with_anns:
+                for box in boxes_pred:
+                    c = np.array(get_color(box.name)) / 255.0
+                    box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
+                for box in boxes_gt:
+                    c = np.array(get_color(box.name)) / 255.0
+                    box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
+
+            # Limit visible range.
+            ax[j, ind].set_xlim(0, data.size[0])
+            ax[j, ind].set_ylim(data.size[1], 0)
+            ax[j + 2, ind].set_xlim(0, data.size[0])
+            ax[j + 2, ind].set_ylim(data.size[1], 0)
+
+        else:
+            raise ValueError('Error: Unknown sensor modality!')
+
+        ax[j, ind].axis('off')
+        ax[j, ind].set_title('PRED: {} {labels_type}'.format(
+            sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
+        ax[j, ind].set_aspect('equal')
+
+        ax[j + 2, ind].axis('off')
+        ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
+            sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
+        ax[j + 2, ind].set_aspect('equal')
+
+    if out_path is not None:
+        plt.savefig(out_path + '_camera', bbox_inches='tight', pad_inches=0, dpi=200)
+    if verbose:
+        plt.show()
+    plt.close()
+
+
+if __name__ == '__main__':
+    nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
+    # render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
+    bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
+    sample_token_list = list(bevformer_results['results'].keys())
+    for id in range(0, 10):
+        render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])