将子模块转换为普通目录

19472568 · 雍大凯 · 51e55208 · 19472568 · 19472568 · 19472568
Commit 19472568 authored Apr 08, 2026 by 雍大凯
20 changed files
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/modules/spatial_cross_attention.py
+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import xavier_init, constant_init
+from mmcv.cnn.bricks.registry import (ATTENTION,
+                                      TRANSFORMER_LAYER,
+                                      TRANSFORMER_LAYER_SEQUENCE)
+from mmcv.cnn.bricks.transformer import build_attention
+import math
+from mmcv.runner import force_fp32, auto_fp16
+from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
+from mmcv.utils import ext_loader
+from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32, \
+    MultiScaleDeformableAttnFunction_fp16
+from projects.mmdet3d_plugin.models.utils.bricks import run_time
+ext_module = ext_loader.load_ext(
+    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
+@ATTENTION.register_module()
+class SpatialCrossAttention(BaseModule):
+    """An attention module used in BEVFormer.
+    Args:
+        embed_dims (int): The embedding dimension of Attention.
+            Default: 256.
+        num_cams (int): The number of cameras
+        dropout (float): A Dropout layer on `inp_residual`.
+            Default: 0..
+        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+            Default: None.
+        deformable_attention: (dict): The config for the deformable attention used in SCA.
+    """
+    def __init__(self,
+                 embed_dims=256,
+                 num_cams=6,
+                 pc_range=None,
+                 dropout=0.1,
+                 init_cfg=None,
+                 batch_first=False,
+                 deformable_attention=dict(
+                     type='MSDeformableAttention3D',
+                     embed_dims=256,
+                     num_levels=4),
+                 **kwargs
+                 ):
+        super(SpatialCrossAttention, self).__init__(init_cfg)
+        self.init_cfg = init_cfg
+        self.dropout = nn.Dropout(dropout)
+        self.pc_range = pc_range
+        self.fp16_enabled = False
+        self.deformable_attention = build_attention(deformable_attention)
+        self.embed_dims = embed_dims
+        self.num_cams = num_cams
+        self.output_proj = nn.Linear(embed_dims, embed_dims)
+        self.batch_first = batch_first
+        self.init_weight()
+    def init_weight(self):
+        """Default initialization for Parameters of Module."""
+        xavier_init(self.output_proj, distribution='uniform', bias=0.)
+    @force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points_cam'))
+    def forward(self,
+                query,
+                key,
+                value,
+                residual=None,
+                query_pos=None,
+                key_padding_mask=None,
+                reference_points=None,
+                spatial_shapes=None,
+                reference_points_cam=None,
+                bev_mask=None,
+                level_start_index=None,
+                flag='encoder',
+                **kwargs):
+        """Forward Function of Detr3DCrossAtten.
+        Args:
+            query (Tensor): Query of Transformer with shape
+                (num_query, bs, embed_dims).
+            key (Tensor): The key tensor with shape
+                `(num_key, bs, embed_dims)`.
+            value (Tensor): The value tensor with shape
+                `(num_key, bs, embed_dims)`. (B, N, C, H, W)
+            residual (Tensor): The tensor used for addition, with the
+                same shape as `x`. Default None. If None, `x` will be used.
+            query_pos (Tensor): The positional encoding for `query`.
+                Default: None.
+            key_pos (Tensor): The positional encoding for  `key`. Default
+                None.
+            reference_points (Tensor):  The normalized reference
+                points with shape (bs, num_query, 4),
+                all elements is range in [0, 1], top-left (0,0),
+                bottom-right (1, 1), including padding area.
+                or (N, Length_{query}, num_levels, 4), add
+                additional two dimensions is (w, h) to
+                form reference boxes.
+            key_padding_mask (Tensor): ByteTensor for `query`, with
+                shape [bs, num_key].
+            spatial_shapes (Tensor): Spatial shape of features in
+                different level. With shape  (num_levels, 2),
+                last dimension represent (h, w).
+            level_start_index (Tensor): The start index of each level.
+                A tensor has shape (num_levels) and can be represented
+                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
+        Returns:
+             Tensor: forwarded results with shape [num_query, bs, embed_dims].
+        """
+        if key is None:
+            key = query
+        if value is None:
+            value = key
+        if residual is None:
+            inp_residual = query
+            slots = torch.zeros_like(query)
+        if query_pos is not None:
+            query = query + query_pos
+        bs, num_query, _ = query.size()
+        D = reference_points_cam.size(3)
+        indexes = []
+        for i, mask_per_img in enumerate(bev_mask):
+            index_query_per_img = mask_per_img[0].sum(-1).nonzero().squeeze(-1)
+            indexes.append(index_query_per_img)
+        max_len = max([len(each) for each in indexes])
+        # each camera only interacts with its corresponding BEV queries. This step can  greatly save GPU memory.
+        queries_rebatch = query.new_zeros(
+            [bs, self.num_cams, max_len, self.embed_dims])
+        reference_points_rebatch = reference_points_cam.new_zeros(
+            [bs, self.num_cams, max_len, D, 2])
+        for j in range(bs):
+            for i, reference_points_per_img in enumerate(reference_points_cam):   
+                index_query_per_img = indexes[i]
+                queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]
+                reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]
+        num_cams, l, bs, embed_dims = key.shape
+        key = key.permute(2, 0, 1, 3).reshape(
+            bs * self.num_cams, l, self.embed_dims)
+        value = value.permute(2, 0, 1, 3).reshape(
+            bs * self.num_cams, l, self.embed_dims)
+        queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), key=key, value=value,
+                                            reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), spatial_shapes=spatial_shapes,
+                                            level_start_index=level_start_index).view(bs, self.num_cams, max_len, self.embed_dims)
+        for j in range(bs):
+            for i, index_query_per_img in enumerate(indexes):
+                slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]
+        count = bev_mask.sum(-1) > 0
+        count = count.permute(1, 2, 0).sum(-1)
+        count = torch.clamp(count, min=1.0)
+        slots = slots / count[..., None]
+        slots = self.output_proj(slots)
+        return self.dropout(slots) + inp_residual
+@ATTENTION.register_module()
+class MSDeformableAttention3D(BaseModule):
+    """An attention module used in BEVFormer based on Deformable-Detr.
+    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.
+    <https://arxiv.org/pdf/2010.04159.pdf>`_.
+    Args:
+        embed_dims (int): The embedding dimension of Attention.
+            Default: 256.
+        num_heads (int): Parallel attention heads. Default: 64.
+        num_levels (int): The number of feature map used in
+            Attention. Default: 4.
+        num_points (int): The number of sampling points for
+            each query in each head. Default: 4.
+        im2col_step (int): The step used in image_to_column.
+            Default: 64.
+        dropout (float): A Dropout layer on `inp_identity`.
+            Default: 0.1.
+        batch_first (bool): Key, Query and Value are shape of
+            (batch, n, embed_dim)
+            or (n, batch, embed_dim). Default to False.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: None.
+        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+            Default: None.
+    """
+    def __init__(self,
+                 embed_dims=256,
+                 num_heads=8,
+                 num_levels=4,
+                 num_points=8,
+                 im2col_step=64,
+                 dropout=0.1,
+                 batch_first=True,
+                 norm_cfg=None,
+                 init_cfg=None):
+        super().__init__(init_cfg)
+        if embed_dims % num_heads != 0:
+            raise ValueError(f'embed_dims must be divisible by num_heads, '
+                             f'but got {embed_dims} and {num_heads}')
+        dim_per_head = embed_dims // num_heads
+        self.norm_cfg = norm_cfg
+        self.batch_first = batch_first
+        self.output_proj = None
+        self.fp16_enabled = False
+        # you'd better set dim_per_head to a power of 2
+        # which is more efficient in the CUDA implementation
+        def _is_power_of_2(n):
+            if (not isinstance(n, int)) or (n < 0):
+                raise ValueError(
+                    'invalid input for _is_power_of_2: {} (type: {})'.format(
+                        n, type(n)))
+            return (n & (n - 1) == 0) and n != 0
+        if not _is_power_of_2(dim_per_head):
+            warnings.warn(
+                "You'd better set embed_dims in "
+                'MultiScaleDeformAttention to make '
+                'the dimension of each attention head a power of 2 '
+                'which is more efficient in our CUDA implementation.')
+        self.im2col_step = im2col_step
+        self.embed_dims = embed_dims
+        self.num_levels = num_levels
+        self.num_heads = num_heads
+        self.num_points = num_points
+        self.sampling_offsets = nn.Linear(
+            embed_dims, num_heads * num_levels * num_points * 2)
+        self.attention_weights = nn.Linear(embed_dims,
+                                           num_heads * num_levels * num_points)
+        self.value_proj = nn.Linear(embed_dims, embed_dims)
+        self.init_weights()
+    def init_weights(self):
+        """Default initialization for Parameters of Module."""
+        constant_init(self.sampling_offsets, 0.)
+        thetas = torch.arange(
+            self.num_heads,
+            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init /
+                     grid_init.abs().max(-1, keepdim=True)[0]).view(
+            self.num_heads, 1, 1,
+            2).repeat(1, self.num_levels, self.num_points, 1)
+        for i in range(self.num_points):
+            grid_init[:, :, i, :] *= i + 1
+        self.sampling_offsets.bias.data = grid_init.view(-1)
+        constant_init(self.attention_weights, val=0., bias=0.)
+        xavier_init(self.value_proj, distribution='uniform', bias=0.)
+        xavier_init(self.output_proj, distribution='uniform', bias=0.)
+        self._is_init = True
+    def forward(self,
+                query,
+                key=None,
+                value=None,
+                identity=None,
+                query_pos=None,
+                key_padding_mask=None,
+                reference_points=None,
+                spatial_shapes=None,
+                level_start_index=None,
+                **kwargs):
+        """Forward Function of MultiScaleDeformAttention.
+        Args:
+            query (Tensor): Query of Transformer with shape
+                ( bs, num_query, embed_dims).
+            key (Tensor): The key tensor with shape
+                `(bs, num_key,  embed_dims)`.
+            value (Tensor): The value tensor with shape
+                `(bs, num_key,  embed_dims)`.
+            identity (Tensor): The tensor used for addition, with the
+                same shape as `query`. Default None. If None,
+                `query` will be used.
+            query_pos (Tensor): The positional encoding for `query`.
+                Default: None.
+            key_pos (Tensor): The positional encoding for `key`. Default
+                None.
+            reference_points (Tensor):  The normalized reference
+                points with shape (bs, num_query, num_levels, 2),
+                all elements is range in [0, 1], top-left (0,0),
+                bottom-right (1, 1), including padding area.
+                or (N, Length_{query}, num_levels, 4), add
+                additional two dimensions is (w, h) to
+                form reference boxes.
+            key_padding_mask (Tensor): ByteTensor for `query`, with
+                shape [bs, num_key].
+            spatial_shapes (Tensor): Spatial shape of features in
+                different levels. With shape (num_levels, 2),
+                last dimension represents (h, w).
+            level_start_index (Tensor): The start index of each level.
+                A tensor has shape ``(num_levels, )`` and can be represented
+                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
+        Returns:
+             Tensor: forwarded results with shape [num_query, bs, embed_dims].
+        """
+        if value is None:
+            value = query
+        if identity is None:
+            identity = query
+        if query_pos is not None:
+            query = query + query_pos
+        if not self.batch_first:
+            # change to (bs, num_query ,embed_dims)
+            query = query.permute(1, 0, 2)
+            value = value.permute(1, 0, 2)
+        bs, num_query, _ = query.shape
+        bs, num_value, _ = value.shape
+        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
+        value = self.value_proj(value)
+        if key_padding_mask is not None:
+            value = value.masked_fill(key_padding_mask[..., None], 0.0)
+        value = value.view(bs, num_value, self.num_heads, -1)
+        sampling_offsets = self.sampling_offsets(query).view(
+            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
+        attention_weights = self.attention_weights(query).view(
+            bs, num_query, self.num_heads, self.num_levels * self.num_points)
+        attention_weights = attention_weights.softmax(-1)
+        attention_weights = attention_weights.view(bs, num_query,
+                                                   self.num_heads,
+                                                   self.num_levels,
+                                                   self.num_points)
+        if reference_points.shape[-1] == 2:
+            """
+            For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.
+            After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.
+            For each referent point, we sample `num_points` sampling points.
+            For `num_Z_anchors` reference points,  it has overall `num_points * num_Z_anchors` sampling points.
+            """
+            offset_normalizer = torch.stack(
+                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
+            bs, num_query, num_Z_anchors, xy = reference_points.shape
+            reference_points = reference_points[:, :, None, None, None, :, :]
+            sampling_offsets = sampling_offsets / \
+                offset_normalizer[None, None, None, :, None, :]
+            bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape
+            sampling_offsets = sampling_offsets.view(
+                bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)
+            sampling_locations = reference_points + sampling_offsets
+            bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape
+            assert num_all_points == num_points * num_Z_anchors
+            sampling_locations = sampling_locations.view(
+                bs, num_query, num_heads, num_levels, num_all_points, xy)
+        elif reference_points.shape[-1] == 4:
+            assert False
+        else:
+            raise ValueError(
+                f'Last dim of reference_points must be'
+                f' 2 or 4, but get {reference_points.shape[-1]} instead.')
+        #  sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2
+        #  attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points
+        #
+        if torch.cuda.is_available() and value.is_cuda:
+            if value.dtype == torch.float16:
+                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
+            else:
+                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
+            output = MultiScaleDeformableAttnFunction.apply(
+                value, spatial_shapes, level_start_index, sampling_locations,
+                attention_weights, self.im2col_step)
+        else:
+            output = multi_scale_deformable_attn_pytorch(
+                value, spatial_shapes, sampling_locations, attention_weights)
+        if not self.batch_first:
+            output = output.permute(1, 0, 2)
+        return output
+@ATTENTION.register_module()
+class MSIPM3D(BaseModule):
+    """An attention module used in BEVFormer based on Deformable-Detr.
+    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.
+    <https://arxiv.org/pdf/2010.04159.pdf>`_.
+    Args:
+        embed_dims (int): The embedding dimension of Attention.
+            Default: 256.
+        num_heads (int): Parallel attention heads. Default: 64.
+        num_levels (int): The number of feature map used in
+            Attention. Default: 4.
+        num_points (int): The number of sampling points for
+            each query in each head. Default: 4.
+        im2col_step (int): The step used in image_to_column.
+            Default: 64.
+        dropout (float): A Dropout layer on `inp_identity`.
+            Default: 0.1.
+        batch_first (bool): Key, Query and Value are shape of
+            (batch, n, embed_dim)
+            or (n, batch, embed_dim). Default to False.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: None.
+        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+            Default: None.
+    """
+    def __init__(self,
+                 embed_dims=256,
+                 num_heads=8,
+                 num_levels=4,
+                 num_points=8,
+                 im2col_step=64,
+                 dropout=0.1,
+                 batch_first=True,
+                 norm_cfg=None,
+                 init_cfg=None):
+        super().__init__(init_cfg)
+        if embed_dims % num_heads != 0:
+            raise ValueError(f'embed_dims must be divisible by num_heads, '
+                             f'but got {embed_dims} and {num_heads}')
+        dim_per_head = embed_dims // num_heads
+        self.norm_cfg = norm_cfg
+        self.batch_first = batch_first
+        self.output_proj = None
+        self.fp16_enabled = False
+        # you'd better set dim_per_head to a power of 2
+        # which is more efficient in the CUDA implementation
+        def _is_power_of_2(n):
+            if (not isinstance(n, int)) or (n < 0):
+                raise ValueError(
+                    'invalid input for _is_power_of_2: {} (type: {})'.format(
+                        n, type(n)))
+            return (n & (n - 1) == 0) and n != 0
+        if not _is_power_of_2(dim_per_head):
+            warnings.warn(
+                "You'd better set embed_dims in "
+                'MultiScaleDeformAttention to make '
+                'the dimension of each attention head a power of 2 '
+                'which is more efficient in our CUDA implementation.')
+        self.im2col_step = im2col_step
+        self.embed_dims = embed_dims
+        self.num_levels = num_levels
+        self.num_heads = num_heads
+        self.num_points = num_points
+        # self.sampling_offsets = nn.Linear(
+        #     embed_dims, num_heads * num_levels * num_points * 2)
+        # self.attention_weights = nn.Linear(embed_dims,
+        #                                    num_heads * num_levels * num_points)
+        self.value_proj = nn.Linear(embed_dims, embed_dims)
+        self.init_weights()
+    def init_weights(self):
+        """Default initialization for Parameters of Module."""
+        # constant_init(self.sampling_offsets, 0.)
+        thetas = torch.arange(
+            self.num_heads,
+            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init /
+                     grid_init.abs().max(-1, keepdim=True)[0]).view(
+            self.num_heads, 1, 1,
+            2).repeat(1, self.num_levels, self.num_points, 1)
+        for i in range(self.num_points):
+            grid_init[:, :, i, :] *= i + 1
+        # self.sampling_offsets.bias.data = grid_init.view(-1)
+        self.fixed_sampling_offsets = nn.Parameter(grid_init.view(-1), requires_grad=False)
+        # constant_init(self.attention_weights, val=0., bias=0.)
+        xavier_init(self.value_proj, distribution='uniform', bias=0.)
+        xavier_init(self.output_proj, distribution='uniform', bias=0.)
+        self._is_init = True
+    def forward(self,
+                query,
+                key=None,
+                value=None,
+                identity=None,
+                query_pos=None,
+                key_padding_mask=None,
+                reference_points=None,
+                spatial_shapes=None,
+                level_start_index=None,
+                **kwargs):
+        """Forward Function of MultiScaleDeformAttention.
+        Args:
+            query (Tensor): Query of Transformer with shape
+                ( bs, num_query, embed_dims).
+            key (Tensor): The key tensor with shape
+                `(bs, num_key,  embed_dims)`.
+            value (Tensor): The value tensor with shape
+                `(bs, num_key,  embed_dims)`.
+            identity (Tensor): The tensor used for addition, with the
+                same shape as `query`. Default None. If None,
+                `query` will be used.
+            query_pos (Tensor): The positional encoding for `query`.
+                Default: None.
+            key_pos (Tensor): The positional encoding for `key`. Default
+                None.
+            reference_points (Tensor):  The normalized reference
+                points with shape (bs, num_query, num_levels, 2),
+                all elements is range in [0, 1], top-left (0,0),
+                bottom-right (1, 1), including padding area.
+                or (N, Length_{query}, num_levels, 4), add
+                additional two dimensions is (w, h) to
+                form reference boxes.
+            key_padding_mask (Tensor): ByteTensor for `query`, with
+                shape [bs, num_key].
+            spatial_shapes (Tensor): Spatial shape of features in
+                different levels. With shape (num_levels, 2),
+                last dimension represents (h, w).
+            level_start_index (Tensor): The start index of each level.
+                A tensor has shape ``(num_levels, )`` and can be represented
+                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
+        Returns:
+             Tensor: forwarded results with shape [num_query, bs, embed_dims].
+        """
+        if value is None:
+            value = query
+        if identity is None:
+            identity = query
+        if query_pos is not None:
+            query = query + query_pos
+        if not self.batch_first:
+            # change to (bs, num_query ,embed_dims)
+            query = query.permute(1, 0, 2)
+            value = value.permute(1, 0, 2)
+        bs, num_query, _ = query.shape
+        bs, num_value, _ = value.shape
+        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
+        value = self.value_proj(value)
+        if key_padding_mask is not None:
+            value = value.masked_fill(key_padding_mask[..., None], 0.0)
+        value = value.view(bs, num_value, self.num_heads, -1)
+        sampling_offsets = self.fixed_sampling_offsets.view(
+            1, 1, self.num_heads, self.num_levels, self.num_points, 2).repeat(
+            bs, num_query, 1, 1, 1,1)
+        # attention_weights = self.attention_weights(query).view(
+        #     bs, num_query, self.num_heads, self.num_levels * self.num_points)
+        attention_weights = query.new_ones((bs, num_query, self.num_heads, self.num_levels * self.num_points))
+        attention_weights = attention_weights.softmax(-1)
+        # import pdb;pdb.set_trace()
+        attention_weights = attention_weights.view(bs, num_query,
+                                                   self.num_heads,
+                                                   self.num_levels,
+                                                   self.num_points)
+        if reference_points.shape[-1] == 2:
+            """
+            For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.
+            After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.
+            For each referent point, we sample `num_points` sampling points.
+            For `num_Z_anchors` reference points,  it has overall `num_points * num_Z_anchors` sampling points.
+            """
+            offset_normalizer = torch.stack(
+                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
+            bs, num_query, num_Z_anchors, xy = reference_points.shape
+            reference_points = reference_points[:, :, None, None, None, :, :]
+            sampling_offsets = sampling_offsets / \
+                offset_normalizer[None, None, None, :, None, :]
+            bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape
+            sampling_offsets = sampling_offsets.view(
+                bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)
+            sampling_locations = reference_points + sampling_offsets
+            bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape
+            assert num_all_points == num_points * num_Z_anchors
+            sampling_locations = sampling_locations.view(
+                bs, num_query, num_heads, num_levels, num_all_points, xy)
+        elif reference_points.shape[-1] == 4:
+            assert False
+        else:
+            raise ValueError(
+                f'Last dim of reference_points must be'
+                f' 2 or 4, but get {reference_points.shape[-1]} instead.')
+        #  sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2
+        #  attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points
+        #
+        if torch.cuda.is_available() and value.is_cuda:
+            if value.dtype == torch.float16:
+                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
+            else:
+                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
+            output = MultiScaleDeformableAttnFunction.apply(
+                value, spatial_shapes, level_start_index, sampling_locations,
+                attention_weights, self.im2col_step)
+        else:
+            output = multi_scale_deformable_attn_pytorch(
+                value, spatial_shapes, sampling_locations, attention_weights)
+        if not self.batch_first:
+            output = output.permute(1, 0, 2)
+        return output
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/modules/temporal_self_attention.py
+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+from projects.mmdet3d_plugin.models.utils.bricks import run_time
+from .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32
+from mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch
+import warnings
+import torch
+import torch.nn as nn
+from mmcv.cnn import xavier_init, constant_init
+from mmcv.cnn.bricks.registry import ATTENTION
+import math
+from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
+from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,
+                        to_2tuple)
+from mmcv.utils import ext_loader
+ext_module = ext_loader.load_ext(
+    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
+@ATTENTION.register_module()
+class TemporalSelfAttention(BaseModule):
+    """An attention module used in BEVFormer based on Deformable-Detr.
+    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.
+    <https://arxiv.org/pdf/2010.04159.pdf>`_.
+    Args:
+        embed_dims (int): The embedding dimension of Attention.
+            Default: 256.
+        num_heads (int): Parallel attention heads. Default: 64.
+        num_levels (int): The number of feature map used in
+            Attention. Default: 4.
+        num_points (int): The number of sampling points for
+            each query in each head. Default: 4.
+        im2col_step (int): The step used in image_to_column.
+            Default: 64.
+        dropout (float): A Dropout layer on `inp_identity`.
+            Default: 0.1.
+        batch_first (bool): Key, Query and Value are shape of
+            (batch, n, embed_dim)
+            or (n, batch, embed_dim). Default to True.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: None.
+        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+            Default: None.
+        num_bev_queue (int): In this version, we only use one history BEV and one currenct BEV.
+         the length of BEV queue is 2.
+    """
+    def __init__(self,
+                 embed_dims=256,
+                 num_heads=8,
+                 num_levels=4,
+                 num_points=4,
+                 num_bev_queue=2,
+                 im2col_step=64,
+                 dropout=0.1,
+                 batch_first=True,
+                 norm_cfg=None,
+                 init_cfg=None):
+        super().__init__(init_cfg)
+        if embed_dims % num_heads != 0:
+            raise ValueError(f'embed_dims must be divisible by num_heads, '
+                             f'but got {embed_dims} and {num_heads}')
+        dim_per_head = embed_dims // num_heads
+        self.norm_cfg = norm_cfg
+        self.dropout = nn.Dropout(dropout)
+        self.batch_first = batch_first
+        self.fp16_enabled = False
+        # you'd better set dim_per_head to a power of 2
+        # which is more efficient in the CUDA implementation
+        def _is_power_of_2(n):
+            if (not isinstance(n, int)) or (n < 0):
+                raise ValueError(
+                    'invalid input for _is_power_of_2: {} (type: {})'.format(
+                        n, type(n)))
+            return (n & (n - 1) == 0) and n != 0
+        if not _is_power_of_2(dim_per_head):
+            warnings.warn(
+                "You'd better set embed_dims in "
+                'MultiScaleDeformAttention to make '
+                'the dimension of each attention head a power of 2 '
+                'which is more efficient in our CUDA implementation.')
+        self.im2col_step = im2col_step
+        self.embed_dims = embed_dims
+        self.num_levels = num_levels
+        self.num_heads = num_heads
+        self.num_points = num_points
+        self.num_bev_queue = num_bev_queue
+        self.sampling_offsets = nn.Linear(
+            embed_dims*self.num_bev_queue, num_bev_queue*num_heads * num_levels * num_points * 2)
+        self.attention_weights = nn.Linear(embed_dims*self.num_bev_queue,
+                                           num_bev_queue*num_heads * num_levels * num_points)
+        self.value_proj = nn.Linear(embed_dims, embed_dims)
+        self.output_proj = nn.Linear(embed_dims, embed_dims)
+        self.init_weights()
+    def init_weights(self):
+        """Default initialization for Parameters of Module."""
+        constant_init(self.sampling_offsets, 0.)
+        thetas = torch.arange(
+            self.num_heads,
+            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init /
+                     grid_init.abs().max(-1, keepdim=True)[0]).view(
+            self.num_heads, 1, 1,
+            2).repeat(1, self.num_levels*self.num_bev_queue, self.num_points, 1)
+        for i in range(self.num_points):
+            grid_init[:, :, i, :] *= i + 1
+        self.sampling_offsets.bias.data = grid_init.view(-1)
+        constant_init(self.attention_weights, val=0., bias=0.)
+        xavier_init(self.value_proj, distribution='uniform', bias=0.)
+        xavier_init(self.output_proj, distribution='uniform', bias=0.)
+        self._is_init = True
+    def forward(self,
+                query,
+                key=None,
+                value=None,
+                identity=None,
+                query_pos=None,
+                key_padding_mask=None,
+                reference_points=None,
+                spatial_shapes=None,
+                level_start_index=None,
+                flag='decoder',
+                **kwargs):
+        """Forward Function of MultiScaleDeformAttention.
+        Args:
+            query (Tensor): Query of Transformer with shape
+                (num_query, bs, embed_dims).
+            key (Tensor): The key tensor with shape
+                `(num_key, bs, embed_dims)`.
+            value (Tensor): The value tensor with shape
+                `(num_key, bs, embed_dims)`.
+            identity (Tensor): The tensor used for addition, with the
+                same shape as `query`. Default None. If None,
+                `query` will be used.
+            query_pos (Tensor): The positional encoding for `query`.
+                Default: None.
+            key_pos (Tensor): The positional encoding for `key`. Default
+                None.
+            reference_points (Tensor):  The normalized reference
+                points with shape (bs, num_query, num_levels, 2),
+                all elements is range in [0, 1], top-left (0,0),
+                bottom-right (1, 1), including padding area.
+                or (N, Length_{query}, num_levels, 4), add
+                additional two dimensions is (w, h) to
+                form reference boxes.
+            key_padding_mask (Tensor): ByteTensor for `query`, with
+                shape [bs, num_key].
+            spatial_shapes (Tensor): Spatial shape of features in
+                different levels. With shape (num_levels, 2),
+                last dimension represents (h, w).
+            level_start_index (Tensor): The start index of each level.
+                A tensor has shape ``(num_levels, )`` and can be represented
+                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
+        Returns:
+             Tensor: forwarded results with shape [num_query, bs, embed_dims].
+        """
+        if value is None:
+            assert self.batch_first
+            bs, len_bev, c = query.shape
+            value = torch.stack([query, query], 1).reshape(bs*2, len_bev, c)
+            # value = torch.cat([query, query], 0)
+        if identity is None:
+            identity = query
+        if query_pos is not None:
+            query = query + query_pos
+        if not self.batch_first:
+            # change to (bs, num_query ,embed_dims)
+            query = query.permute(1, 0, 2)
+            value = value.permute(1, 0, 2)
+        bs,  num_query, embed_dims = query.shape
+        _, num_value, _ = value.shape
+        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
+        assert self.num_bev_queue == 2
+        query = torch.cat([value[:bs], query], -1)
+        value = self.value_proj(value)
+        if key_padding_mask is not None:
+            value = value.masked_fill(key_padding_mask[..., None], 0.0)
+        value = value.reshape(bs*self.num_bev_queue,
+                              num_value, self.num_heads, -1)
+        sampling_offsets = self.sampling_offsets(query)
+        sampling_offsets = sampling_offsets.view(
+            bs, num_query, self.num_heads,  self.num_bev_queue, self.num_levels, self.num_points, 2)
+        attention_weights = self.attention_weights(query).view(
+            bs, num_query,  self.num_heads, self.num_bev_queue, self.num_levels * self.num_points)
+        attention_weights = attention_weights.softmax(-1)
+        attention_weights = attention_weights.view(bs, num_query,
+                                                   self.num_heads,
+                                                   self.num_bev_queue,
+                                                   self.num_levels,
+                                                   self.num_points)
+        attention_weights = attention_weights.permute(0, 3, 1, 2, 4, 5)\
+            .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points).contiguous()
+        sampling_offsets = sampling_offsets.permute(0, 3, 1, 2, 4, 5, 6)\
+            .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points, 2)
+        if reference_points.shape[-1] == 2:
+            offset_normalizer = torch.stack(
+                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
+            sampling_locations = reference_points[:, :, None, :, None, :] \
+                + sampling_offsets \
+                / offset_normalizer[None, None, None, :, None, :]
+        elif reference_points.shape[-1] == 4:
+            sampling_locations = reference_points[:, :, None, :, None, :2] \
+                + sampling_offsets / self.num_points \
+                * reference_points[:, :, None, :, None, 2:] \
+                * 0.5
+        else:
+            raise ValueError(
+                f'Last dim of reference_points must be'
+                f' 2 or 4, but get {reference_points.shape[-1]} instead.')
+        if torch.cuda.is_available() and value.is_cuda:
+            # using fp16 deformable attention is unstable because it performs many sum operations
+            if value.dtype == torch.float16:
+                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
+            else:
+                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32
+            output = MultiScaleDeformableAttnFunction.apply(
+                value, spatial_shapes, level_start_index, sampling_locations,
+                attention_weights, self.im2col_step)
+        else:
+            output = multi_scale_deformable_attn_pytorch(
+                value, spatial_shapes, sampling_locations, attention_weights)
+        # output shape (bs*num_bev_queue, num_query, embed_dims)
+        # (bs*num_bev_queue, num_query, embed_dims)-> (num_query, embed_dims, bs*num_bev_queue)
+        output = output.permute(1, 2, 0)
+        # fuse history value and current value
+        # (num_query, embed_dims, bs*num_bev_queue)-> (num_query, embed_dims, bs, num_bev_queue)
+        output = output.view(num_query, embed_dims, bs, self.num_bev_queue)
+        output = output.mean(-1)
+        # (num_query, embed_dims, bs)-> (bs, num_query, embed_dims)
+        output = output.permute(2, 0, 1)
+        output = self.output_proj(output)
+        if not self.batch_first:
+            output = output.permute(1, 0, 2)
+        return self.dropout(output) + identity
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/modules/transformer.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/modules/transformer.py
+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.cnn import xavier_init
+from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence
+from mmcv.runner.base_module import BaseModule
+from mmdet.models.utils.builder import TRANSFORMER
+from torch.nn.init import normal_
+from projects.mmdet3d_plugin.models.utils.visual import save_tensor
+from mmcv.runner.base_module import BaseModule
+from torchvision.transforms.functional import rotate
+from .temporal_self_attention import TemporalSelfAttention
+from .spatial_cross_attention import MSDeformableAttention3D
+from .decoder import CustomMSDeformableAttention
+from projects.mmdet3d_plugin.models.utils.bricks import run_time
+from mmcv.runner import force_fp32, auto_fp16
+@TRANSFORMER.register_module()
+class PerceptionTransformer(BaseModule):
+    """Implements the Detr3D transformer.
+    Args:
+        as_two_stage (bool): Generate query from encoder features.
+            Default: False.
+        num_feature_levels (int): Number of feature maps from FPN:
+            Default: 4.
+        two_stage_num_proposals (int): Number of proposals when set
+            `as_two_stage` as True. Default: 300.
+    """
+    def __init__(self,
+                 num_feature_levels=4,
+                 num_cams=6,
+                 two_stage_num_proposals=300,
+                 encoder=None,
+                 decoder=None,
+                 embed_dims=256,
+                 rotate_prev_bev=True,
+                 use_shift=True,
+                 use_can_bus=True,
+                 can_bus_norm=True,
+                 use_cams_embeds=True,
+                 rotate_center=[100, 100],
+                 **kwargs):
+        super(PerceptionTransformer, self).__init__(**kwargs)
+        self.encoder = build_transformer_layer_sequence(encoder)
+        self.decoder = build_transformer_layer_sequence(decoder)
+        self.embed_dims = embed_dims
+        self.num_feature_levels = num_feature_levels
+        self.num_cams = num_cams
+        self.fp16_enabled = False
+        self.rotate_prev_bev = rotate_prev_bev
+        self.use_shift = use_shift
+        self.use_can_bus = use_can_bus
+        self.can_bus_norm = can_bus_norm
+        self.use_cams_embeds = use_cams_embeds
+        self.two_stage_num_proposals = two_stage_num_proposals
+        self.init_layers()
+        self.rotate_center = rotate_center
+    def init_layers(self):
+        """Initialize layers of the Detr3DTransformer."""
+        self.level_embeds = nn.Parameter(torch.Tensor(
+            self.num_feature_levels, self.embed_dims))
+        self.cams_embeds = nn.Parameter(
+            torch.Tensor(self.num_cams, self.embed_dims))
+        self.reference_points = nn.Linear(self.embed_dims, 3)
+        self.can_bus_mlp = nn.Sequential(
+            nn.Linear(18, self.embed_dims // 2),
+            nn.ReLU(inplace=True),
+            nn.Linear(self.embed_dims // 2, self.embed_dims),
+            nn.ReLU(inplace=True),
+        )
+        if self.can_bus_norm:
+            self.can_bus_mlp.add_module('norm', nn.LayerNorm(self.embed_dims))
+    def init_weights(self):
+        """Initialize the transformer weights."""
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for m in self.modules():
+            if isinstance(m, MSDeformableAttention3D) or isinstance(m, TemporalSelfAttention) \
+                    or isinstance(m, CustomMSDeformableAttention):
+                try:
+                    m.init_weight()
+                except AttributeError:
+                    m.init_weights()
+        normal_(self.level_embeds)
+        normal_(self.cams_embeds)
+        xavier_init(self.reference_points, distribution='uniform', bias=0.)
+        xavier_init(self.can_bus_mlp, distribution='uniform', bias=0.)
+    @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'prev_bev', 'bev_pos'))
+    def get_bev_features(
+            self,
+            mlvl_feats,
+            bev_queries,
+            bev_h,
+            bev_w,
+            grid_length=[0.512, 0.512],
+            bev_pos=None,
+            prev_bev=None,
+            **kwargs):
+        """
+        obtain bev features.
+        """
+        bs = mlvl_feats[0].size(0)
+        bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1)
+        bev_pos = bev_pos.flatten(2).permute(2, 0, 1)
+        # obtain rotation angle and shift with ego motion
+        delta_x = np.array([each['can_bus'][0]
+                           for each in kwargs['img_metas']])
+        delta_y = np.array([each['can_bus'][1]
+                           for each in kwargs['img_metas']])
+        ego_angle = np.array(
+            [each['can_bus'][-2] / np.pi * 180 for each in kwargs['img_metas']])
+        grid_length_y = grid_length[0]
+        grid_length_x = grid_length[1]
+        translation_length = np.sqrt(delta_x ** 2 + delta_y ** 2)
+        translation_angle = np.arctan2(delta_y, delta_x) / np.pi * 180
+        bev_angle = ego_angle - translation_angle
+        shift_y = translation_length * \
+            np.cos(bev_angle / 180 * np.pi) / grid_length_y / bev_h
+        shift_x = translation_length * \
+            np.sin(bev_angle / 180 * np.pi) / grid_length_x / bev_w
+        shift_y = shift_y * self.use_shift
+        shift_x = shift_x * self.use_shift
+        shift = bev_queries.new_tensor(
+            [shift_x, shift_y]).permute(1, 0)  # xy, bs -> bs, xy
+        if prev_bev is not None:
+            if prev_bev.shape[1] == bev_h * bev_w:
+                prev_bev = prev_bev.permute(1, 0, 2)
+            if self.rotate_prev_bev:
+                for i in range(bs):
+                    # num_prev_bev = prev_bev.size(1)
+                    rotation_angle = kwargs['img_metas'][i]['can_bus'][-1]
+                    tmp_prev_bev = prev_bev[:, i].reshape(
+                        bev_h, bev_w, -1).permute(2, 0, 1)
+                    tmp_prev_bev = rotate(tmp_prev_bev, rotation_angle,
+                                          center=self.rotate_center)
+                    tmp_prev_bev = tmp_prev_bev.permute(1, 2, 0).reshape(
+                        bev_h * bev_w, 1, -1)
+                    prev_bev[:, i] = tmp_prev_bev[:, 0]
+        # add can bus signals
+        can_bus = bev_queries.new_tensor(
+            [each['can_bus'] for each in kwargs['img_metas']])  # [:, :]
+        can_bus = self.can_bus_mlp(can_bus)[None, :, :]
+        bev_queries = bev_queries + can_bus * self.use_can_bus
+        feat_flatten = []
+        spatial_shapes = []
+        for lvl, feat in enumerate(mlvl_feats):
+            bs, num_cam, c, h, w = feat.shape
+            spatial_shape = (h, w)
+            feat = feat.flatten(3).permute(1, 0, 3, 2)
+            if self.use_cams_embeds:
+                feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype)
+            feat = feat + self.level_embeds[None,
+                                            None, lvl:lvl + 1, :].to(feat.dtype)
+            spatial_shapes.append(spatial_shape)
+            feat_flatten.append(feat)
+        feat_flatten = torch.cat(feat_flatten, 2)
+        spatial_shapes = torch.as_tensor(
+            spatial_shapes, dtype=torch.long, device=bev_pos.device)
+        level_start_index = torch.cat((spatial_shapes.new_zeros(
+            (1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))
+        feat_flatten = feat_flatten.permute(
+            0, 2, 1, 3)  # (num_cam, H*W, bs, embed_dims)
+        bev_embed = self.encoder(
+            bev_queries,
+            feat_flatten,
+            feat_flatten,
+            bev_h=bev_h,
+            bev_w=bev_w,
+            bev_pos=bev_pos,
+            spatial_shapes=spatial_shapes,
+            level_start_index=level_start_index,
+            prev_bev=prev_bev,
+            shift=shift,
+            **kwargs
+        )
+        return bev_embed
+    @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos'))
+    def forward(self,
+                mlvl_feats,
+                bev_queries,
+                object_query_embed,
+                bev_h,
+                bev_w,
+                grid_length=[0.512, 0.512],
+                bev_pos=None,
+                reg_branches=None,
+                cls_branches=None,
+                prev_bev=None,
+                **kwargs):
+        """Forward function for `Detr3DTransformer`.
+        Args:
+            mlvl_feats (list(Tensor)): Input queries from
+                different level. Each element has shape
+                [bs, num_cams, embed_dims, h, w].
+            bev_queries (Tensor): (bev_h*bev_w, c)
+            bev_pos (Tensor): (bs, embed_dims, bev_h, bev_w)
+            object_query_embed (Tensor): The query embedding for decoder,
+                with shape [num_query, c].
+            reg_branches (obj:`nn.ModuleList`): Regression heads for
+                feature maps from each decoder layer. Only would
+                be passed when `with_box_refine` is True. Default to None.
+        Returns:
+            tuple[Tensor]: results of decoder containing the following tensor.
+                - bev_embed: BEV features
+                - inter_states: Outputs from decoder. If
+                    return_intermediate_dec is True output has shape \
+                      (num_dec_layers, bs, num_query, embed_dims), else has \
+                      shape (1, bs, num_query, embed_dims).
+                - init_reference_out: The initial value of reference \
+                    points, has shape (bs, num_queries, 4).
+                - inter_references_out: The internal value of reference \
+                    points in decoder, has shape \
+                    (num_dec_layers, bs,num_query, embed_dims)
+                - enc_outputs_class: The classification score of \
+                    proposals generated from \
+                    encoder's feature maps, has shape \
+                    (batch, h*w, num_classes). \
+                    Only would be returned when `as_two_stage` is True, \
+                    otherwise None.
+                - enc_outputs_coord_unact: The regression results \
+                    generated from encoder's feature maps., has shape \
+                    (batch, h*w, 4). Only would \
+                    be returned when `as_two_stage` is True, \
+                    otherwise None.
+        """
+        bev_embed = self.get_bev_features(
+            mlvl_feats,
+            bev_queries,
+            bev_h,
+            bev_w,
+            grid_length=grid_length,
+            bev_pos=bev_pos,
+            prev_bev=prev_bev,
+            **kwargs)  # bev_embed shape: bs, bev_h*bev_w, embed_dims
+        bs = mlvl_feats[0].size(0)
+        query_pos, query = torch.split(
+            object_query_embed, self.embed_dims, dim=1)
+        query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1)
+        query = query.unsqueeze(0).expand(bs, -1, -1)
+        reference_points = self.reference_points(query_pos)
+        reference_points = reference_points.sigmoid()
+        init_reference_out = reference_points
+        query = query.permute(1, 0, 2)
+        query_pos = query_pos.permute(1, 0, 2)
+        bev_embed = bev_embed.permute(1, 0, 2)
+        inter_states, inter_references = self.decoder(
+            query=query,
+            key=None,
+            value=bev_embed,
+            query_pos=query_pos,
+            reference_points=reference_points,
+            reg_branches=reg_branches,
+            cls_branches=cls_branches,
+            spatial_shapes=torch.tensor([[bev_h, bev_w]], device=query.device),
+            level_start_index=torch.tensor([0], device=query.device),
+            **kwargs)
+        inter_references_out = inter_references
+        return bev_embed, inter_states, init_reference_out, inter_references_out
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/runner/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/runner/__init__.py
+from .epoch_based_runner import EpochBasedRunner_video
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+import os.path as osp
+import torch
+import mmcv
+from mmcv.runner.base_runner import BaseRunner
+from mmcv.runner.epoch_based_runner import EpochBasedRunner
+from mmcv.runner.builder import RUNNERS
+from mmcv.runner.checkpoint import save_checkpoint
+from mmcv.runner.utils import get_host_info
+from pprint import pprint
+from mmcv.parallel.data_container import DataContainer
+@RUNNERS.register_module()
+class EpochBasedRunner_video(EpochBasedRunner):
+    ''' 
+    # basic logic
+    input_sequence = [a, b, c] # given a sequence of samples
+    prev_bev = None
+    for each in input_sequcene[:-1]
+        prev_bev = eval_model(each, prev_bev)) # inference only.
+    model(input_sequcene[-1], prev_bev) # train the last sample.
+    '''
+    def __init__(self,
+                 model,
+                 eval_model=None,
+                 batch_processor=None,
+                 optimizer=None,
+                 work_dir=None,
+                 logger=None,
+                 meta=None,
+                 keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'],
+                 max_iters=None,
+                 max_epochs=None):
+        super().__init__(model,
+                 batch_processor,
+                 optimizer,
+                 work_dir,
+                 logger,
+                 meta,
+                 max_iters,
+                 max_epochs)
+        keys.append('img_metas')
+        self.keys = keys
+        self.eval_model = eval_model
+        self.eval_model.eval()
+    def run_iter(self, data_batch, train_mode, **kwargs):
+        if self.batch_processor is not None:
+            assert False
+            # outputs = self.batch_processor(
+            #     self.model, data_batch, train_mode=train_mode, **kwargs)
+        elif train_mode:
+            num_samples = data_batch['img'].data[0].size(1)
+            data_list = []
+            prev_bev = None
+            for i in range(num_samples):
+                data = {}
+                for key in self.keys:
+                    if key not in ['img_metas', 'img', 'points']:
+                        data[key] = data_batch[key]
+                    else:
+                        if key == 'img':
+                            data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True)
+                        elif key == 'img_metas':
+                            data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only)
+                        else:
+                            assert False
+                data_list.append(data)
+            with torch.no_grad():
+                for i in range(num_samples-1):
+                    if i>0: data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
+                    prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs)
+            data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
+            outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs)
+        else:
+            assert False
+            # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
+        if not isinstance(outputs, dict):
+            raise TypeError('"batch_processor()" or "model.train_step()"'
+                            'and "model.val_step()" must return a dict')
+        if 'log_vars' in outputs:
+            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
+        self.outputs = outputs
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
+from .hungarian_assigner_3d import HungarianAssigner3D
+__all__ = ['HungarianAssigner3D']
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
+import torch
+from mmdet.core.bbox.builder import BBOX_ASSIGNERS
+from mmdet.core.bbox.assigners import AssignResult
+from mmdet.core.bbox.assigners import BaseAssigner
+from mmdet.core.bbox.match_costs import build_match_cost
+from mmdet.models.utils.transformer import inverse_sigmoid
+from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+@BBOX_ASSIGNERS.register_module()
+class HungarianAssigner3D(BaseAssigner):
+    """Computes one-to-one matching between predictions and ground truth.
+    This class computes an assignment between the targets and the predictions
+    based on the costs. The costs are weighted sum of three components:
+    classification cost, regression L1 cost and regression iou cost. The
+    targets don't include the no_object, so generally there are more
+    predictions than targets. After the one-to-one matching, the un-matched
+    are treated as backgrounds. Thus each query prediction will be assigned
+    with `0` or a positive integer indicating the ground truth index:
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        cls_weight (int | float, optional): The scale factor for classification
+            cost. Default 1.0.
+        bbox_weight (int | float, optional): The scale factor for regression
+            L1 cost. Default 1.0.
+        iou_weight (int | float, optional): The scale factor for regression
+            iou cost. Default 1.0.
+        iou_calculator (dict | optional): The config for the iou calculation.
+            Default type `BboxOverlaps2D`.
+        iou_mode (str | optional): "iou" (intersection over union), "iof"
+                (intersection over foreground), or "giou" (generalized
+                intersection over union). Default "giou".
+    """
+    def __init__(self,
+                 cls_cost=dict(type='ClassificationCost', weight=1.),
+                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),
+                 iou_cost=dict(type='IoUCost', weight=0.0),
+                 pc_range=None):
+        self.cls_cost = build_match_cost(cls_cost)
+        self.reg_cost = build_match_cost(reg_cost)
+        self.iou_cost = build_match_cost(iou_cost)
+        self.pc_range = pc_range
+    def assign(self,
+               bbox_pred,
+               cls_pred,
+               gt_bboxes, 
+               gt_labels,
+               gt_bboxes_ignore=None,
+               eps=1e-7):
+        """Computes one-to-one matching based on the weighted costs.
+        This method assign each query prediction to a ground truth or
+        background. The `assigned_gt_inds` with -1 means don't care,
+        0 means negative sample, and positive number is the index (1-based)
+        of assigned gt.
+        The assignment is done in the following steps, the order matters.
+        1. assign every prediction to -1
+        2. compute the weighted costs
+        3. do Hungarian matching on CPU based on the costs
+        4. assign all to 0 (background) first, then for each matched pair
+           between predictions and gts, treat this prediction as foreground
+           and assign the corresponding gt index (plus 1) to it.
+        Args:
+            bbox_pred (Tensor): Predicted boxes with normalized coordinates
+                (cx, cy, w, h), which are all in range [0, 1]. Shape
+                [num_query, 4].
+            cls_pred (Tensor): Predicted classification logits, shape
+                [num_query, num_class].
+            gt_bboxes (Tensor): Ground truth boxes with unnormalized
+                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`. Default None.
+            eps (int | float, optional): A value added to the denominator for
+                numerical stability. Default 1e-7.
+        Returns:
+            :obj:`AssignResult`: The assigned result.
+        """
+        assert gt_bboxes_ignore is None, \
+            'Only case when gt_bboxes_ignore is None is supported.'
+        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
+        # 1. assign -1 by default
+        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
+                                              -1,
+                                              dtype=torch.long)
+        assigned_labels = bbox_pred.new_full((num_bboxes, ),
+                                             -1,
+                                             dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        # 2. compute the weighted costs
+        # classification and bboxcost.
+        cls_cost = self.cls_cost(cls_pred, gt_labels)
+        # regression L1 cost
+        normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
+        reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
+        # weighted sum of above two costs
+        cost = cls_cost + reg_cost
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu()
+        if linear_sum_assignment is None:
+            raise ImportError('Please run "pip install scipy" '
+                              'to install scipy first.')
+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(
+            bbox_pred.device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(
+            bbox_pred.device)
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
+        return AssignResult(
+            num_gts, assigned_gt_inds, None, labels=assigned_labels)
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/coders/__init__.py
+from .nms_free_coder import NMSFreeCoder, MapTRNMSFreeCoder
+__all__ = ['NMSFreeCoder', 'MapTRNMSFreeCoder']
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
+import torch
+from mmdet.core.bbox import BaseBBoxCoder
+from mmdet.core.bbox.builder import BBOX_CODERS
+from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
+import numpy as np
+from mmdet.core.bbox.transforms import bbox_xyxy_to_cxcywh, bbox_cxcywh_to_xyxy
+def denormalize_3d_pts(pts, pc_range):
+    new_pts = pts.clone()
+    new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -
+                            pc_range[0]) + pc_range[0])
+    new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -
+                            pc_range[1]) + pc_range[1])
+    new_pts[...,2:3] = (pts[...,2:3]*(pc_range[5] -
+                            pc_range[2]) + pc_range[2])
+    return new_pts
+def normalize_3d_pts(pts, pc_range):
+    patch_h = pc_range[4]-pc_range[1]
+    patch_w = pc_range[3]-pc_range[0]
+    patch_z = pc_range[5]-pc_range[2]
+    new_pts = pts.clone()
+    new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]
+    new_pts[...,1:2] = pts[...,1:2] - pc_range[1]
+    new_pts[...,2:3] = pts[...,2:3] - pc_range[2]
+    factor = pts.new_tensor([patch_w, patch_h,patch_z])
+    normalized_pts = new_pts / factor
+    return normalized_pts
+def normalize_2d_bbox(bboxes, pc_range):
+    patch_h = pc_range[4]-pc_range[1]
+    patch_w = pc_range[3]-pc_range[0]
+    cxcywh_bboxes = bbox_xyxy_to_cxcywh(bboxes)
+    cxcywh_bboxes[...,0:1] = cxcywh_bboxes[..., 0:1] - pc_range[0]
+    cxcywh_bboxes[...,1:2] = cxcywh_bboxes[...,1:2] - pc_range[1]
+    factor = bboxes.new_tensor([patch_w, patch_h,patch_w,patch_h])
+    normalized_bboxes = cxcywh_bboxes / factor
+    return normalized_bboxes
+def normalize_2d_pts(pts, pc_range):
+    patch_h = pc_range[4]-pc_range[1]
+    patch_w = pc_range[3]-pc_range[0]
+    new_pts = pts.clone()
+    new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]
+    new_pts[...,1:2] = pts[...,1:2] - pc_range[1]
+    factor = pts.new_tensor([patch_w, patch_h])
+    normalized_pts = new_pts / factor
+    return normalized_pts
+def denormalize_2d_bbox(bboxes, pc_range):
+    bboxes = bbox_cxcywh_to_xyxy(bboxes)
+    bboxes[..., 0::2] = (bboxes[..., 0::2]*(pc_range[3] -
+                            pc_range[0]) + pc_range[0])
+    bboxes[..., 1::2] = (bboxes[..., 1::2]*(pc_range[4] -
+                            pc_range[1]) + pc_range[1])
+    return bboxes
+def denormalize_2d_pts(pts, pc_range):
+    new_pts = pts.clone()
+    new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -
+                            pc_range[0]) + pc_range[0])
+    new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -
+                            pc_range[1]) + pc_range[1])
+    return new_pts
+@BBOX_CODERS.register_module()
+class NMSFreeCoder(BaseBBoxCoder):
+    """Bbox coder for NMS-free detector.
+    Args:
+        pc_range (list[float]): Range of point cloud.
+        post_center_range (list[float]): Limit of the center.
+            Default: None.
+        max_num (int): Max number to be kept. Default: 100.
+        score_threshold (float): Threshold to filter boxes based on score.
+            Default: None.
+        code_size (int): Code size of bboxes. Default: 9
+    """
+    def __init__(self,
+                 pc_range,
+                 voxel_size=None,
+                 post_center_range=None,
+                 max_num=100,
+                 score_threshold=None,
+                 num_classes=10):
+        self.pc_range = pc_range
+        self.voxel_size = voxel_size
+        self.post_center_range = post_center_range
+        self.max_num = max_num
+        self.score_threshold = score_threshold
+        self.num_classes = num_classes
+    def encode(self):
+        pass
+    def decode_single(self, cls_scores, bbox_preds):
+        """Decode bboxes.
+        Args:
+            cls_scores (Tensor): Outputs from the classification head, \
+                shape [num_query, cls_out_channels]. Note \
+                cls_out_channels should includes background.
+            bbox_preds (Tensor): Outputs from the regression \
+                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
+                Shape [num_query, 9].
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        max_num = self.max_num
+        cls_scores = cls_scores.sigmoid()
+        scores, indexs = cls_scores.view(-1).topk(max_num)
+        labels = indexs % self.num_classes
+        bbox_index = indexs // self.num_classes
+        bbox_preds = bbox_preds[bbox_index]
+        final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)   
+        final_scores = scores 
+        final_preds = labels 
+        # use score threshold
+        if self.score_threshold is not None:
+            thresh_mask = final_scores > self.score_threshold
+            tmp_score = self.score_threshold
+            while thresh_mask.sum() == 0:
+                tmp_score *= 0.9
+                if tmp_score < 0.01:
+                    thresh_mask = final_scores > -1
+                    break
+                thresh_mask = final_scores >= tmp_score
+        if self.post_center_range is not None:
+            self.post_center_range = torch.tensor(
+                self.post_center_range, device=scores.device)
+            mask = (final_box_preds[..., :3] >=
+                    self.post_center_range[:3]).all(1)
+            mask &= (final_box_preds[..., :3] <=
+                     self.post_center_range[3:]).all(1)
+            if self.score_threshold:
+                mask &= thresh_mask
+            boxes3d = final_box_preds[mask]
+            scores = final_scores[mask]
+            labels = final_preds[mask]
+            predictions_dict = {
+                'bboxes': boxes3d,
+                'scores': scores,
+                'labels': labels
+            }
+        else:
+            raise NotImplementedError(
+                'Need to reorganize output as a batch, only '
+                'support post_center_range is not None for now!')
+        return predictions_dict
+    def decode(self, preds_dicts):
+        """Decode bboxes.
+        Args:
+            all_cls_scores (Tensor): Outputs from the classification head, \
+                shape [nb_dec, bs, num_query, cls_out_channels]. Note \
+                cls_out_channels should includes background.
+            all_bbox_preds (Tensor): Sigmoid outputs from the regression \
+                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
+                Shape [nb_dec, bs, num_query, 9].
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        all_cls_scores = preds_dicts['all_cls_scores'][-1]
+        all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
+        batch_size = all_cls_scores.size()[0]
+        predictions_list = []
+        for i in range(batch_size):
+            predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
+        return predictions_list
+@BBOX_CODERS.register_module()
+class MapTRNMSFreeCoder(BaseBBoxCoder):
+    """Bbox coder for NMS-free detector.
+    Args:
+        pc_range (list[float]): Range of point cloud.
+        post_center_range (list[float]): Limit of the center.
+            Default: None.
+        max_num (int): Max number to be kept. Default: 100.
+        score_threshold (float): Threshold to filter boxes based on score.
+            Default: None.
+        code_size (int): Code size of bboxes. Default: 9
+    """
+    def __init__(self,
+                 pc_range,
+                 z_cfg = dict(
+                    pred_z_flag=False,
+                    gt_z_flag=False,
+                 ),
+                 voxel_size=None,
+                 post_center_range=None,
+                 max_num=100,
+                 score_threshold=None,
+                 num_classes=10):
+        self.pc_range = pc_range
+        self.voxel_size = voxel_size
+        self.post_center_range = post_center_range
+        self.max_num = max_num
+        self.score_threshold = score_threshold
+        self.num_classes = num_classes
+        self.z_cfg = z_cfg
+    def encode(self):
+        pass
+    def decode_single(self, cls_scores, bbox_preds, pts_preds):
+        """Decode bboxes.
+        Args:
+            cls_scores (Tensor): Outputs from the classification head, \
+                shape [num_query, cls_out_channels]. Note \
+                cls_out_channels should includes background.
+            bbox_preds (Tensor): Outputs from the regression \
+                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
+                Shape [num_query, 9].
+            pts_preds (Tensor):
+                Shape [num_query, fixed_num_pts, 2]
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        max_num = self.max_num
+        cls_scores = cls_scores.sigmoid()
+        scores, indexs = cls_scores.view(-1).topk(max_num)
+        labels = indexs % self.num_classes
+        bbox_index = indexs // self.num_classes
+        bbox_preds = bbox_preds[bbox_index]
+        pts_preds = pts_preds[bbox_index]
+        final_box_preds = denormalize_2d_bbox(bbox_preds, self.pc_range) 
+        #num_q,num_p,2
+        final_pts_preds = denormalize_2d_pts(pts_preds, self.pc_range) if not self.z_cfg['gt_z_flag'] \
+                        else denormalize_3d_pts(pts_preds, self.pc_range) 
+        # final_box_preds = bbox_preds 
+        final_scores = scores 
+        final_preds = labels 
+        # use score threshold
+        if self.score_threshold is not None:
+            thresh_mask = final_scores > self.score_threshold
+            tmp_score = self.score_threshold
+            while thresh_mask.sum() == 0:
+                tmp_score *= 0.9
+                if tmp_score < 0.01:
+                    thresh_mask = final_scores > -1
+                    break
+                thresh_mask = final_scores >= tmp_score
+        if self.post_center_range is not None:
+            self.post_center_range = torch.tensor(
+                self.post_center_range, device=scores.device)
+            mask = (final_box_preds[..., :4] >=
+                    self.post_center_range[:4]).all(1)
+            mask &= (final_box_preds[..., :4] <=
+                     self.post_center_range[4:]).all(1)
+            if self.score_threshold:
+                mask &= thresh_mask
+            boxes3d = final_box_preds[mask]
+            scores = final_scores[mask]
+            pts = final_pts_preds[mask]
+            labels = final_preds[mask]
+            predictions_dict = {
+                'bboxes': boxes3d,
+                'scores': scores,
+                'labels': labels,
+                'pts': pts,
+            }
+        else:
+            raise NotImplementedError(
+                'Need to reorganize output as a batch, only '
+                'support post_center_range is not None for now!')
+        return predictions_dict
+    def decode(self, preds_dicts):
+        """Decode bboxes.
+        Args:
+            all_cls_scores (Tensor): Outputs from the classification head, \
+                shape [nb_dec, bs, num_query, cls_out_channels]. Note \
+                cls_out_channels should includes background.
+            all_bbox_preds (Tensor): Sigmoid outputs from the regression \
+                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
+                Shape [nb_dec, bs, num_query, 9].
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        all_cls_scores = preds_dicts['all_cls_scores'][-1]
+        all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
+        all_pts_preds = preds_dicts['all_pts_preds'][-1]
+        batch_size = all_cls_scores.size()[0]
+        predictions_list = []
+        for i in range(batch_size):
+            predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i],all_pts_preds[i]))
+        return predictions_list
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
+from mmdet.core.bbox.match_costs import build_match_cost
+from .match_cost import BBox3DL1Cost
+__all__ = ['build_match_cost', 'BBox3DL1Cost']
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py
+import torch
+from mmdet.core.bbox.match_costs.builder import MATCH_COST
+@MATCH_COST.register_module()
+class BBox3DL1Cost(object):
+    """BBox3DL1Cost.
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+    def __init__(self, weight=1.):
+        self.weight = weight
+    def __call__(self, bbox_pred, gt_bboxes):
+        """
+        Args:
+            bbox_pred (Tensor): Predicted boxes with normalized coordinates
+                (cx, cy, w, h), which are all in range [0, 1]. Shape
+                [num_query, 4].
+            gt_bboxes (Tensor): Ground truth boxes with normalized
+                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
+        Returns:
+            torch.Tensor: bbox_cost value with weight
+        """
+        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
+        return bbox_cost * self.weight
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/util.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/bbox/util.py
+import torch 
+def normalize_bbox(bboxes, pc_range):
+    cx = bboxes[..., 0:1]
+    cy = bboxes[..., 1:2]
+    cz = bboxes[..., 2:3]
+    w = bboxes[..., 3:4].log()
+    l = bboxes[..., 4:5].log()
+    h = bboxes[..., 5:6].log()
+    rot = bboxes[..., 6:7]
+    if bboxes.size(-1) > 7:
+        vx = bboxes[..., 7:8] 
+        vy = bboxes[..., 8:9]
+        normalized_bboxes = torch.cat(
+            (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
+        )
+    else:
+        normalized_bboxes = torch.cat(
+            (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
+        )
+    return normalized_bboxes
+def denormalize_bbox(normalized_bboxes, pc_range):
+    # rotation 
+    rot_sine = normalized_bboxes[..., 6:7]
+    rot_cosine = normalized_bboxes[..., 7:8]
+    rot = torch.atan2(rot_sine, rot_cosine)
+    # center in the bev
+    cx = normalized_bboxes[..., 0:1]
+    cy = normalized_bboxes[..., 1:2]
+    cz = normalized_bboxes[..., 4:5]
+    # size
+    w = normalized_bboxes[..., 2:3]
+    l = normalized_bboxes[..., 3:4]
+    h = normalized_bboxes[..., 5:6]
+    w = w.exp() 
+    l = l.exp() 
+    h = h.exp() 
+    if normalized_bboxes.size(-1) > 8:
+         # velocity 
+        vx = normalized_bboxes[:, 8:9]
+        vy = normalized_bboxes[:, 9:10]
+        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
+    else:
+        denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
+    return denormalized_bboxes
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/evaluation/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/evaluation/__init__.py
+from .eval_hooks import CustomDistEvalHook
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py
+# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
+# in order to avoid strong version dependency, we did not directly
+# inherit EvalHook but BaseDistEvalHook.
+import bisect
+import os.path as osp
+import mmcv
+import torch.distributed as dist
+from mmcv.runner import DistEvalHook as BaseDistEvalHook
+from mmcv.runner import EvalHook as BaseEvalHook
+from torch.nn.modules.batchnorm import _BatchNorm
+from mmdet.core.evaluation.eval_hooks import DistEvalHook
+def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
+    assert mmcv.is_list_of(dynamic_interval_list, tuple)
+    dynamic_milestones = [0]
+    dynamic_milestones.extend(
+        [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
+    dynamic_intervals = [start_interval]
+    dynamic_intervals.extend(
+        [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
+    return dynamic_milestones, dynamic_intervals
+class CustomDistEvalHook(BaseDistEvalHook):
+    def __init__(self, *args, dynamic_intervals=None,  **kwargs):
+        super(CustomDistEvalHook, self).__init__(*args, **kwargs)
+        self.use_dynamic_intervals = dynamic_intervals is not None
+        if self.use_dynamic_intervals:
+            self.dynamic_milestones, self.dynamic_intervals = \
+                _calc_dynamic_intervals(self.interval, dynamic_intervals)
+    def _decide_interval(self, runner):
+        if self.use_dynamic_intervals:
+            progress = runner.epoch if self.by_epoch else runner.iter
+            step = bisect.bisect(self.dynamic_milestones, (progress + 1))
+            # Dynamically modify the evaluation interval
+            self.interval = self.dynamic_intervals[step - 1]
+    def before_train_epoch(self, runner):
+        """Evaluate the model only at the start of training by epoch."""
+        self._decide_interval(runner)
+        super().before_train_epoch(runner)
+    def before_train_iter(self, runner):
+        self._decide_interval(runner)
+        super().before_train_iter(runner)
+    def _do_evaluate(self, runner):
+        """perform evaluation and save ckpt."""
+        # Synchronization of BatchNorm's buffer (running_mean
+        # and running_var) is not supported in the DDP of pytorch,
+        # which may cause the inconsistent performance of models in
+        # different ranks, so we broadcast BatchNorm's buffers
+        # of rank 0 to other ranks to avoid this.
+        if self.broadcast_bn_buffer:
+            model = runner.model
+            for name, module in model.named_modules():
+                if isinstance(module,
+                              _BatchNorm) and module.track_running_stats:
+                    dist.broadcast(module.running_var, 0)
+                    dist.broadcast(module.running_mean, 0)
+        if not self._should_evaluate(runner):
+            return
+        tmpdir = self.tmpdir
+        if tmpdir is None:
+            tmpdir = osp.join(runner.work_dir, '.eval_hook')
+        from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur  import
+        results = custom_multi_gpu_test(
+            runner.model,
+            self.dataloader,
+            tmpdir=tmpdir,
+            gpu_collect=self.gpu_collect)
+        if runner.rank == 0:
+            print('\n')
+            runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
+            key_score = self.evaluate(runner, results)
+            if self.save_best:
+                self._save_ckpt(runner, key_score)
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/core/evaluation/kitti2waymo.py
+# Copyright (c) OpenMMLab. All rights reserved.
+r"""Adapted from `Waymo to KITTI converter
+    <https://github.com/caizhongang/waymo_kitti_converter>`_.
+"""
+try:
+    from waymo_open_dataset import dataset_pb2 as open_dataset
+    import mmcv
+    import numpy as np
+    import tensorflow as tf
+    from glob import glob
+    from os.path import join
+    from waymo_open_dataset import label_pb2
+    from waymo_open_dataset.protos import metrics_pb2
+except ImportError:
+    #pass
+    raise ImportError(
+        'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
+        'to install the official devkit first.')
+class KITTI2Waymo(object):
+    """KITTI predictions to Waymo converter.
+    This class serves as the converter to change predictions from KITTI to
+    Waymo format.
+    Args:
+        kitti_result_files (list[dict]): Predictions in KITTI format.
+        waymo_tfrecords_dir (str): Directory to load waymo raw data.
+        waymo_results_save_dir (str): Directory to save converted predictions
+            in waymo format (.bin files).
+        waymo_results_final_path (str): Path to save combined
+            predictions in waymo format (.bin file), like 'a/b/c.bin'.
+        prefix (str): Prefix of filename. In general, 0 for training, 1 for
+            validation and 2 for testing.
+        workers (str): Number of parallel processes.
+    """
+    def __init__(self,
+                 kitti_result_files,
+                 waymo_tfrecords_dir,
+                 waymo_results_save_dir,
+                 waymo_results_final_path,
+                 prefix,
+                 workers=64):
+        self.kitti_result_files = kitti_result_files
+        self.waymo_tfrecords_dir = waymo_tfrecords_dir
+        self.waymo_results_save_dir = waymo_results_save_dir
+        self.waymo_results_final_path = waymo_results_final_path
+        self.prefix = prefix
+        self.workers = int(workers)
+        self.name2idx = {}
+        for idx, result in enumerate(kitti_result_files):
+            if len(result['sample_idx']) > 0:
+                self.name2idx[str(result['sample_idx'][0])] = idx
+        # turn on eager execution for older tensorflow versions
+        if int(tf.__version__.split('.')[0]) < 2:
+            tf.enable_eager_execution()
+        self.k2w_cls_map = {
+            'Car': label_pb2.Label.TYPE_VEHICLE,
+            'Pedestrian': label_pb2.Label.TYPE_PEDESTRIAN,
+            'Sign': label_pb2.Label.TYPE_SIGN,
+            'Cyclist': label_pb2.Label.TYPE_CYCLIST,
+        }
+        self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0],
+                                            [-1.0, 0.0, 0.0, 0.0],
+                                            [0.0, -1.0, 0.0, 0.0],
+                                            [0.0, 0.0, 0.0, 1.0]])
+        self.get_file_names()
+        self.create_folder()
+    def get_file_names(self):
+        """Get file names of waymo raw data."""
+        self.waymo_tfrecord_pathnames = sorted(
+            glob(join(self.waymo_tfrecords_dir, '*.tfrecord')))
+        print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.')
+    def create_folder(self):
+        """Create folder for data conversion."""
+        mmcv.mkdir_or_exist(self.waymo_results_save_dir)
+    def parse_objects(self, kitti_result, T_k2w, context_name,
+                      frame_timestamp_micros):
+        """Parse one prediction with several instances in kitti format and
+        convert them to `Object` proto.
+        Args:
+            kitti_result (dict): Predictions in kitti format.
+                - name (np.ndarray): Class labels of predictions.
+                - dimensions (np.ndarray): Height, width, length of boxes.
+                - location (np.ndarray): Bottom center of boxes (x, y, z).
+                - rotation_y (np.ndarray): Orientation of boxes.
+                - score (np.ndarray): Scores of predictions.
+            T_k2w (np.ndarray): Transformation matrix from kitti to waymo.
+            context_name (str): Context name of the frame.
+            frame_timestamp_micros (int): Frame timestamp.
+        Returns:
+            :obj:`Object`: Predictions in waymo dataset Object proto.
+        """
+        def parse_one_object(instance_idx):
+            """Parse one instance in kitti format and convert them to `Object`
+            proto.
+            Args:
+                instance_idx (int): Index of the instance to be converted.
+            Returns:
+                :obj:`Object`: Predicted instance in waymo dataset \
+                    Object proto.
+            """
+            cls = kitti_result['name'][instance_idx]
+            length = round(kitti_result['dimensions'][instance_idx, 0], 4)
+            height = round(kitti_result['dimensions'][instance_idx, 1], 4)
+            width = round(kitti_result['dimensions'][instance_idx, 2], 4)
+            x = round(kitti_result['location'][instance_idx, 0], 4)
+            y = round(kitti_result['location'][instance_idx, 1], 4)
+            z = round(kitti_result['location'][instance_idx, 2], 4)
+            rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)
+            score = round(kitti_result['score'][instance_idx], 4)
+            # y: downwards; move box origin from bottom center (kitti) to
+            # true center (waymo)
+            y -= height / 2
+            # frame transformation: kitti -> waymo
+            x, y, z = self.transform(T_k2w, x, y, z)
+            # different conventions
+            heading = -(rotation_y + np.pi / 2)
+            while heading < -np.pi:
+                heading += 2 * np.pi
+            while heading > np.pi:
+                heading -= 2 * np.pi
+            box = label_pb2.Label.Box()
+            box.center_x = x
+            box.center_y = y
+            box.center_z = z
+            box.length = length
+            box.width = width
+            box.height = height
+            box.heading = heading
+            o = metrics_pb2.Object()
+            o.object.box.CopyFrom(box)
+            o.object.type = self.k2w_cls_map[cls]
+            o.score = score
+            o.context_name = context_name
+            o.frame_timestamp_micros = frame_timestamp_micros
+            return o
+        objects = metrics_pb2.Objects()
+        for instance_idx in range(len(kitti_result['name'])):
+            o = parse_one_object(instance_idx)
+            objects.objects.append(o)
+        return objects
+    def convert_one(self, file_idx):
+        """Convert action for single file.
+        Args:
+            file_idx (int): Index of the file to be converted.
+        """
+        file_pathname = self.waymo_tfrecord_pathnames[file_idx]
+        file_data = tf.data.TFRecordDataset(file_pathname, compression_type='')
+        for frame_num, frame_data in enumerate(file_data):
+            frame = open_dataset.Frame()
+            frame.ParseFromString(bytearray(frame_data.numpy()))
+            filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}'
+            for camera in frame.context.camera_calibrations:
+                # FRONT = 1, see dataset.proto for details
+                if camera.name == 1:
+                    T_front_cam_to_vehicle = np.array(
+                        camera.extrinsic.transform).reshape(4, 4)
+            T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam
+            context_name = frame.context.name
+            frame_timestamp_micros = frame.timestamp_micros
+            if filename in self.name2idx:
+                kitti_result = \
+                    self.kitti_result_files[self.name2idx[filename]]
+                objects = self.parse_objects(kitti_result, T_k2w, context_name,
+                                             frame_timestamp_micros)
+            else:
+                print(filename, 'not found.(bevformer)')
+                objects = metrics_pb2.Objects()
+            with open(
+                    join(self.waymo_results_save_dir, f'{filename}.bin'),
+                    'wb') as f:
+                f.write(objects.SerializeToString())
+    def convert(self):
+        """Convert action."""
+        print('Start converting ...')
+        mmcv.track_parallel_progress(self.convert_one, range(len(self)),
+                                     self.workers)
+        print('\nFinished ...')
+        # combine all files into one .bin
+        pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin')))
+        combined = self.combine(pathnames)
+        with open(self.waymo_results_final_path, 'wb') as f:
+            f.write(combined.SerializeToString())
+    def __len__(self):
+        """Length of the filename list."""
+        return len(self.waymo_tfrecord_pathnames)
+    def transform(self, T, x, y, z):
+        """Transform the coordinates with matrix T.
+        Args:
+            T (np.ndarray): Transformation matrix.
+            x(float): Coordinate in x axis.
+            y(float): Coordinate in y axis.
+            z(float): Coordinate in z axis.
+        Returns:
+            list: Coordinates after transformation.
+        """
+        pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1)
+        pt_aft = np.matmul(T, pt_bef)
+        return pt_aft[:3].flatten().tolist()
+    def combine(self, pathnames):
+        """Combine predictions in waymo format for each sample together.
+        Args:
+            pathnames (str): Paths to save predictions.
+        Returns:
+            :obj:`Objects`: Combined predictions in Objects proto.
+        """
+        combined = metrics_pb2.Objects()
+        for pathname in pathnames:
+            objects = metrics_pb2.Objects()
+            with open(pathname, 'rb') as f:
+                objects.ParseFromString(f.read())
+            for o in objects.objects:
+                combined.objects.append(o)
+        return combined
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/__init__.py
+from .nuscenes_dataset import CustomNuScenesDataset
+from .builder import custom_build_dataset
+from .nuscenes_map_dataset import CustomNuScenesLocalMapDataset
+from .av2_map_dataset import CustomAV2LocalMapDataset
+from .nuscenes_offlinemap_dataset import CustomNuScenesOfflineLocalMapDataset
+from .av2_offlinemap_dataset import CustomAV2OfflineLocalMapDataset
+__all__ = [
+    'CustomNuScenesDataset','CustomNuScenesLocalMapDataset'
+]
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/av2_map_dataset.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/av2_map_dataset.py
+import copy
+import numpy as np
+from mmdet.datasets import DATASETS
+from mmdet3d.datasets import NuScenesDataset
+import mmcv
+import os
+from os import path as osp
+from mmdet.datasets import DATASETS
+import torch
+import numpy as np
+from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
+from .nuscnes_eval import NuScenesEval_custom
+from projects.mmdet3d_plugin.models.utils.visual import save_tensor
+from mmcv.parallel import DataContainer as DC
+import random
+from .nuscenes_dataset import CustomNuScenesDataset
+from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
+from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
+from shapely import affinity, ops
+from shapely.geometry import Polygon, LineString, box, MultiPolygon, MultiLineString
+from mmdet.datasets.pipelines import to_tensor
+import json
+from pathlib import Path
+from av2.datasets.sensor.av2_sensor_dataloader import AV2SensorDataLoader
+from av2.map.lane_segment import LaneMarkType, LaneSegment
+from av2.map.map_api import ArgoverseStaticMap
+from av2.geometry.se3 import SE3
+import av2.geometry.interpolate as interp_utils
+class LiDARInstanceLines(object):
+    """Line instance in LIDAR coordinates
+    """
+    def __init__(self, 
+                 instance_line_list, 
+                 sample_dist=1,
+                 num_samples=250,
+                 padding=False,
+                 fixed_num=-1,
+                 padding_value=-10000,
+                 patch_size=None):
+        assert isinstance(instance_line_list, list)
+        assert patch_size is not None
+        if len(instance_line_list) != 0:
+            assert isinstance(instance_line_list[0], LineString)
+        self.patch_size = patch_size
+        self.max_x = self.patch_size[1] / 2
+        self.max_y = self.patch_size[0] / 2
+        self.sample_dist = sample_dist
+        self.num_samples = num_samples
+        self.padding = padding
+        self.fixed_num = fixed_num
+        self.padding_value = padding_value
+        self.instance_list = instance_line_list
+    @property
+    def start_end_points(self):
+        """
+        return torch.Tensor([N,4]), in xstart, ystart, xend, yend form
+        """
+        assert len(self.instance_list) != 0
+        instance_se_points_list = []
+        for instance in self.instance_list:
+            se_points = []
+            se_points.extend(instance.coords[0])
+            se_points.extend(instance.coords[-1])
+            instance_se_points_list.append(se_points)
+        instance_se_points_array = np.array(instance_se_points_list)
+        instance_se_points_tensor = to_tensor(instance_se_points_array)
+        instance_se_points_tensor = instance_se_points_tensor.to(
+                                dtype=torch.float32)
+        instance_se_points_tensor[:,0] = torch.clamp(instance_se_points_tensor[:,0], min=-self.max_x,max=self.max_x)
+        instance_se_points_tensor[:,1] = torch.clamp(instance_se_points_tensor[:,1], min=-self.max_y,max=self.max_y)
+        instance_se_points_tensor[:,2] = torch.clamp(instance_se_points_tensor[:,2], min=-self.max_x,max=self.max_x)
+        instance_se_points_tensor[:,3] = torch.clamp(instance_se_points_tensor[:,3], min=-self.max_y,max=self.max_y)
+        return instance_se_points_tensor
+    @property
+    def bbox(self):
+        """
+        return torch.Tensor([N,4]), in xmin, ymin, xmax, ymax form
+        """
+        assert len(self.instance_list) != 0
+        instance_bbox_list = []
+        for instance in self.instance_list:
+            # bounds is bbox: [xmin, ymin, xmax, ymax]
+            instance_bbox_list.append(instance.bounds)
+        instance_bbox_array = np.array(instance_bbox_list)
+        instance_bbox_tensor = to_tensor(instance_bbox_array)
+        instance_bbox_tensor = instance_bbox_tensor.to(
+                            dtype=torch.float32)
+        instance_bbox_tensor[:,0] = torch.clamp(instance_bbox_tensor[:,0], min=-self.max_x,max=self.max_x)
+        instance_bbox_tensor[:,1] = torch.clamp(instance_bbox_tensor[:,1], min=-self.max_y,max=self.max_y)
+        instance_bbox_tensor[:,2] = torch.clamp(instance_bbox_tensor[:,2], min=-self.max_x,max=self.max_x)
+        instance_bbox_tensor[:,3] = torch.clamp(instance_bbox_tensor[:,3], min=-self.max_y,max=self.max_y)
+        return instance_bbox_tensor
+    @property
+    def fixed_num_sampled_points(self):
+        """
+        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
+            N means the num of instances
+        """
+        assert len(self.instance_list) != 0
+        instance_points_list = []
+        for instance in self.instance_list:
+            # instance_array = np.array(list(instance.coords))
+            # interpolated_instance = interp_utils.interp_arc(t=self.fixed_num, points=instance_array)
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances])
+            if instance.has_z:
+                sampled_points = sampled_points.reshape(-1,3)
+            else:
+                sampled_points = sampled_points.reshape(-1,2)
+            # import pdb;pdb.set_trace()
+            instance_points_list.append(sampled_points)
+        instance_points_array = np.array(instance_points_list)
+        instance_points_tensor = to_tensor(instance_points_array)
+        instance_points_tensor = instance_points_tensor.to(
+                            dtype=torch.float32)
+        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+        return instance_points_tensor
+    @property
+    def fixed_num_sampled_points_ambiguity(self):
+        """
+        return torch.Tensor([N,fixed_num,3]), in xmin, ymin, xmax, ymax form
+            N means the num of instances
+        """
+        assert len(self.instance_list) != 0
+        instance_points_list = []
+        for instance in self.instance_list:
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            if instance.has_z:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+            else:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+            instance_points_list.append(sampled_points)
+        instance_points_array = np.array(instance_points_list)
+        instance_points_tensor = to_tensor(instance_points_array)
+        instance_points_tensor = instance_points_tensor.to(
+                            dtype=torch.float32)
+        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+        instance_points_tensor = instance_points_tensor if is_3d else instance_points_tensor[:,:,:2]
+        instance_points_tensor = instance_points_tensor.unsqueeze(1)
+        return instance_points_tensor
+    @property
+    def fixed_num_sampled_points_torch(self):
+        """
+        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
+            N means the num of instances
+        """
+        assert len(self.instance_list) != 0
+        instance_points_list = []
+        for instance in self.instance_list:
+            # distances = np.linspace(0, instance.length, self.fixed_num)
+            # sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+            poly_pts = to_tensor(np.array(list(instance.coords)))
+            poly_pts = poly_pts.unsqueeze(0).permute(0,2,1)
+            sampled_pts = torch.nn.functional.interpolate(poly_pts,size=(self.fixed_num),mode='linear',align_corners=True)
+            sampled_pts = sampled_pts.permute(0,2,1).squeeze(0)
+            instance_points_list.append(sampled_pts)
+        # instance_points_array = np.array(instance_points_list)
+        # instance_points_tensor = to_tensor(instance_points_array)
+        instance_points_tensor = torch.stack(instance_points_list,dim=0)
+        instance_points_tensor = instance_points_tensor.to(
+                            dtype=torch.float32)
+        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+        return instance_points_tensor
+    @property
+    def shift_fixed_num_sampled_points(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            fixed_num = fixed_num_pts.shape[0]
+            shift_pts_list = []
+            if is_poly:
+                # import pdb;pdb.set_trace()
+                for shift_right_i in range(fixed_num):
+                    shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            if not is_poly:
+                padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v1(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            pts_num = fixed_num_pts.shape[0]
+            shift_num = pts_num - 1
+            if is_poly:
+                pts_to_shift = fixed_num_pts[:-1,:]
+            shift_pts_list = []
+            if is_poly:
+                for shift_right_i in range(shift_num):
+                    shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            if is_poly:
+                _, _, num_coords = shift_pts.shape
+                tmp_shift_pts = shift_pts.new_zeros((shift_num, pts_num, num_coords))
+                tmp_shift_pts[:,:-1,:] = shift_pts
+                tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
+                shift_pts = tmp_shift_pts
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            if not is_poly:
+                padding = torch.full([shift_num-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v2(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        assert len(self.instance_list) != 0
+        instances_list = []
+        for instance in self.instance_list:
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            poly_pts = np.array(list(instance.coords))
+            start_pts = poly_pts[0]
+            end_pts = poly_pts[-1]
+            is_poly = np.equal(start_pts, end_pts)
+            is_poly = is_poly.all()
+            shift_pts_list = []
+            pts_num, coords_num = poly_pts.shape
+            shift_num = pts_num - 1
+            final_shift_num = self.fixed_num - 1
+            if is_poly:
+                pts_to_shift = poly_pts[:-1,:]
+                for shift_right_i in range(shift_num):
+                    shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
+                    pts_to_concat = shift_pts[0]
+                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
+                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
+                    shift_instance = LineString(shift_pts)
+                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                    shift_pts_list.append(shift_sampled_points)
+                # import pdb;pdb.set_trace()
+            else:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                flip_sampled_points = np.flip(sampled_points, axis=0)
+                shift_pts_list.append(sampled_points)
+                shift_pts_list.append(flip_sampled_points)
+            multi_shifts_pts = np.stack(shift_pts_list,axis=0)
+            shifts_num,_,_ = multi_shifts_pts.shape
+            if shifts_num > final_shift_num:
+                index = np.random.choice(multi_shifts_pts.shape[0], final_shift_num, replace=False)
+                multi_shifts_pts = multi_shifts_pts[index]
+            multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
+            multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
+                            dtype=torch.float32)
+            multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+            multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+            # if not is_poly:
+            if multi_shifts_pts_tensor.shape[0] < final_shift_num:
+                padding = torch.full([final_shift_num-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
+                multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
+            instances_list.append(multi_shifts_pts_tensor)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v3(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        assert len(self.instance_list) != 0
+        instances_list = []
+        for instance in self.instance_list:
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            poly_pts = np.array(list(instance.coords))
+            start_pts = poly_pts[0]
+            end_pts = poly_pts[-1]
+            is_poly = np.equal(start_pts, end_pts)
+            is_poly = is_poly.all()
+            shift_pts_list = []
+            pts_num, coords_num = poly_pts.shape
+            shift_num = pts_num - 1
+            final_shift_num = self.fixed_num - 1
+            if is_poly:
+                pts_to_shift = poly_pts[:-1,:]
+                for shift_right_i in range(shift_num):
+                    shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
+                    pts_to_concat = shift_pts[0]
+                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
+                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
+                    shift_instance = LineString(shift_pts)
+                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                    shift_pts_list.append(shift_sampled_points)
+                flip_pts_to_shift = np.flip(pts_to_shift, axis=0)
+                for shift_right_i in range(shift_num):
+                    shift_pts = np.roll(flip_pts_to_shift,shift_right_i,axis=0)
+                    pts_to_concat = shift_pts[0]
+                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
+                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
+                    shift_instance = LineString(shift_pts)
+                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                    shift_pts_list.append(shift_sampled_points)
+                # import pdb;pdb.set_trace()
+            else:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                flip_sampled_points = np.flip(sampled_points, axis=0)
+                shift_pts_list.append(sampled_points)
+                shift_pts_list.append(flip_sampled_points)
+            multi_shifts_pts = np.stack(shift_pts_list,axis=0)
+            shifts_num,_,_ = multi_shifts_pts.shape
+            # import pdb;pdb.set_trace()
+            if shifts_num > 2*final_shift_num:
+                index = np.random.choice(shift_num, final_shift_num, replace=False)
+                flip0_shifts_pts = multi_shifts_pts[index]
+                flip1_shifts_pts = multi_shifts_pts[index+shift_num]
+                multi_shifts_pts = np.concatenate((flip0_shifts_pts,flip1_shifts_pts),axis=0)
+            multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
+            multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
+                            dtype=torch.float32)
+            multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+            multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+            # if not is_poly:
+            if multi_shifts_pts_tensor.shape[0] < 2*final_shift_num:
+                padding = torch.full([final_shift_num*2-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
+                multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
+            instances_list.append(multi_shifts_pts_tensor)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v4(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            pts_num = fixed_num_pts.shape[0]
+            shift_num = pts_num - 1
+            shift_pts_list = []
+            if is_poly:
+                pts_to_shift = fixed_num_pts[:-1,:]
+                for shift_right_i in range(shift_num):
+                    shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
+                flip_pts_to_shift = pts_to_shift.flip(0)
+                for shift_right_i in range(shift_num):
+                    shift_pts_list.append(flip_pts_to_shift.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            if is_poly:
+                _, _, num_coords = shift_pts.shape
+                tmp_shift_pts = shift_pts.new_zeros((shift_num*2, pts_num, num_coords))
+                tmp_shift_pts[:,:-1,:] = shift_pts
+                tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
+                shift_pts = tmp_shift_pts
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            if not is_poly:
+                padding = torch.full([shift_num*2-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_torch(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points_torch
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            fixed_num = fixed_num_pts.shape[0]
+            shift_pts_list = []
+            if is_poly:
+                # import pdb;pdb.set_trace()
+                for shift_right_i in range(fixed_num):
+                    shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            if not is_poly:
+                padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    # @property
+    # def polyline_points(self):
+    #     """
+    #     return [[x0,y0],[x1,y1],...]
+    #     """
+    #     assert len(self.instance_list) != 0
+    #     for instance in self.instance_list:
+class VectorizedAV2LocalMap(object):
+    CLASS2LABEL = {
+        'road_divider': 0,
+        'lane_divider': 0,
+        'divider': 0,
+        'ped_crossing': 1,
+        'boundary': 2,
+        'others': -1
+    }
+    def __init__(self,
+                 dataroot,
+                 patch_size,
+                 test_mode=False,
+                 map_classes=['divider','ped_crossing','boundary'],
+                 line_classes=['road_divider', 'lane_divider'],
+                 ped_crossing_classes=['ped_crossing'],
+                 contour_classes=['road_segment', 'lane'],
+                 sample_dist=1,
+                 num_samples=250,
+                 padding=False,
+                 fixed_ptsnum_per_line=-1,
+                 padding_value=-10000,):
+        '''
+        Args:
+            fixed_ptsnum_per_line = -1 : no fixed num
+        '''
+        super().__init__()
+        # self.data_root = dataroot
+        self.test_mode = test_mode
+        if self.test_mode:
+            self.data_root = osp.join(dataroot, "val")
+        else:
+            self.data_root = osp.join(dataroot, "train")
+        self.loader = AV2SensorDataLoader(data_dir=Path(dataroot), labels_dir=Path(dataroot))
+        self.vec_classes = map_classes
+        self.line_classes = line_classes
+        self.ped_crossing_classes = ped_crossing_classes
+        self.polygon_classes = contour_classes
+        self.patch_size = patch_size
+        self.sample_dist = sample_dist
+        self.num_samples = num_samples
+        self.padding = padding
+        self.fixed_num = fixed_ptsnum_per_line
+        self.padding_value = padding_value
+    def gen_vectorized_samples(self, location, map_elements, lidar2global_translation, lidar2global_rotation):
+        '''
+        use lidar2global to get gt map layers
+        av2 lidar2global the same as ego2global
+        location the same as log_id
+        '''
+        # avm = ArgoverseStaticMap.from_map_dir(log_map_dirpath, build_raster=False)
+        map_pose = lidar2global_translation[:2]
+        rotation = Quaternion._from_matrix(lidar2global_rotation)
+        patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])
+        patch_angle = quaternion_yaw(rotation) / np.pi * 180
+        # import pdb;pdb.set_trace()
+        vectors = []
+        city_SE2_ego = SE3(lidar2global_rotation, lidar2global_translation)
+        ego_SE3_city = city_SE2_ego.inverse()
+        for vec_class in self.vec_classes:
+            if vec_class == 'divider':
+                line_geom = self.get_map_divider_geom(patch_box, patch_angle, map_elements[vec_class], ego_SE3_city)
+                line_instances_list = self.line_geoms_to_instances(line_geom)     
+                for divider in line_instances_list:
+                    vectors.append((divider, self.CLASS2LABEL.get('divider', -1)))
+            elif vec_class == 'ped_crossing':
+                ped_geom = self.get_map_ped_geom(patch_box, patch_angle, map_elements[vec_class], ego_SE3_city)
+                ped_instance_list = self.ped_poly_geoms_to_instances(ped_geom)
+                for instance in ped_instance_list:
+                    vectors.append((instance, self.CLASS2LABEL.get('ped_crossing', -1)))
+            elif vec_class == 'boundary':
+                polygon_geom = self.get_map_boundary_geom(patch_box, patch_angle, map_elements[vec_class], ego_SE3_city)
+                poly_bound_list = self.bound_poly_geoms_to_instances(polygon_geom)
+                for bound in poly_bound_list:
+                    vectors.append((bound, self.CLASS2LABEL.get('boundary', -1)))
+            else:
+                raise ValueError(f'WRONG vec_class: {vec_class}')
+        # filter out -1
+        filtered_vectors = []
+        gt_pts_loc_3d = []
+        gt_pts_num_3d = []
+        gt_labels = []
+        gt_instance = []
+        for instance, type in vectors:
+            if type != -1:
+                gt_instance.append(instance)
+                gt_labels.append(type)
+        # import pdb;pdb.set_trace()
+        gt_instance = LiDARInstanceLines(gt_instance,self.sample_dist,
+                        self.num_samples, self.padding, self.fixed_num,self.padding_value, patch_size=self.patch_size)
+        anns_results = dict(
+            gt_vecs_pts_loc=gt_instance,
+            gt_vecs_label=gt_labels,
+        )
+        # import pdb;pdb.set_trace()
+        return anns_results
+    def proc_polygon(self, polygon, ego_SE3_city):
+        # import pdb;pdb.set_trace()
+        interiors = []
+        exterior_cityframe = np.array(list(polygon.exterior.coords))
+        exterior_egoframe = ego_SE3_city.transform_point_cloud(exterior_cityframe)
+        for inter in polygon.interiors:
+            inter_cityframe = np.array(list(inter.coords))
+            inter_egoframe = ego_SE3_city.transform_point_cloud(inter_cityframe)
+            interiors.append(inter_egoframe[:,:2])
+        new_polygon = Polygon(exterior_egoframe[:,:2], interiors)
+        return new_polygon
+    def get_map_boundary_geom(self, patch_box, patch_angle, avm, ego_SE3_city):
+        map_boundary_geom = []
+        patch_x = patch_box[0]
+        patch_y = patch_box[1]
+        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
+        # import pdb;pdb.set_trace()
+        polygon_list = []
+        for da in avm:
+            exterior_coords = da
+            # import pdb;pdb.set_trace()
+            interiors = []
+            # import pdb;pdb.set_trace()
+            is_polygon =  np.array_equal(exterior_coords[0],exterior_coords[-1])
+            if is_polygon:
+                polygon = Polygon(exterior_coords, interiors)
+            else:    
+                import pdb;pdb.set_trace()
+                polygon = LineString(exterior_coords)
+                raise ValueError(f'WRONG type: line in boundary')
+            if is_polygon:
+                if polygon.is_valid:
+                    new_polygon = polygon.intersection(patch)
+                    if not new_polygon.is_empty:
+                        # import pdb;pdb.set_trace()
+                        if new_polygon.geom_type is 'Polygon':
+                            if not new_polygon.is_valid:
+                                continue
+                            new_polygon = self.proc_polygon(new_polygon,ego_SE3_city)
+                            if not new_polygon.is_valid:
+                                continue
+                        elif new_polygon.geom_type is 'MultiPolygon':
+                            polygons = []
+                            for single_polygon in new_polygon.geoms:
+                                if not single_polygon.is_valid or single_polygon.is_empty:
+                                    continue
+                                new_single_polygon = self.proc_polygon(single_polygon,ego_SE3_city)
+                                if not new_single_polygon.is_valid:
+                                    continue
+                                polygons.append(new_single_polygon)
+                            if len(polygons) == 0:
+                                continue
+                            new_polygon = MultiPolygon(polygons)
+                            if not new_polygon.is_valid:
+                                continue
+                        else:
+                            raise ValueError('{} is not valid'.format(new_polygon.geom_type))
+                        if new_polygon.geom_type is 'Polygon':
+                            new_polygon = MultiPolygon([new_polygon])
+                        polygon_list.append(new_polygon)
+            else:
+                raise ValueError(f'WRONG type: line in boundary')
+        map_boundary_geom.append(('boundary',polygon_list))
+        return map_boundary_geom
+    def get_map_ped_geom(self, patch_box, patch_angle, avm, ego_SE3_city):
+        map_ped_geom = []
+        patch_x = patch_box[0]
+        patch_y = patch_box[1]
+        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
+        # import pdb;pdb.set_trace()
+        polygon_list = []
+        for pc in avm:
+            exterior_coords = pc
+            interiors = []
+            polygon = Polygon(exterior_coords, interiors)
+            if polygon.is_valid:
+                new_polygon = polygon.intersection(patch)
+                if not new_polygon.is_empty:
+                    if new_polygon.geom_type is 'Polygon':
+                        if not new_polygon.is_valid:
+                            continue
+                        new_polygon = self.proc_polygon(new_polygon,ego_SE3_city)
+                        if not new_polygon.is_valid:
+                            continue
+                    elif new_polygon.geom_type is 'MultiPolygon':
+                        polygons = []
+                        for single_polygon in new_polygon.geoms:
+                            if not single_polygon.is_valid or single_polygon.is_empty:
+                                continue
+                            new_single_polygon = self.proc_polygon(single_polygon,ego_SE3_city)
+                            if not new_single_polygon.is_valid:
+                                continue
+                            polygons.append(new_single_polygon)
+                        if len(polygons) == 0:
+                            continue
+                        new_polygon = MultiPolygon(polygons)
+                        if not new_polygon.is_valid:
+                            continue
+                    else:
+                        raise ValueError('{} is not valid'.format(new_polygon.geom_type))
+                    if new_polygon.geom_type is 'Polygon':
+                        new_polygon = MultiPolygon([new_polygon])
+                    polygon_list.append(new_polygon)
+        map_ped_geom.append(('ped_crossing',polygon_list))
+        return map_ped_geom
+    def proc_line(self, line,ego_SE3_city):
+        # import pdb;pdb.set_trace()
+        new_line_pts_cityframe = np.array(list(line.coords))
+        new_line_pts_egoframe = ego_SE3_city.transform_point_cloud(new_line_pts_cityframe)
+        line = LineString(new_line_pts_egoframe[:,:2]) #TODO
+        return line
+    def get_map_divider_geom(self, patch_box, patch_angle, avm, ego_SE3_city):
+        map_divider_geom = []
+        patch_x = patch_box[0]
+        patch_y = patch_box[1]
+        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)
+        line_list = []
+        for ls in avm:
+            line = LineString(ls)
+            if line.is_empty:  # Skip lines without nodes.
+                continue
+            new_line = line.intersection(patch)
+            if not new_line.is_empty:
+                # import pdb;pdb.set_trace()
+                if new_line.geom_type == 'MultiLineString':
+                    for single_line in new_line.geoms:
+                        if single_line.is_empty:
+                            continue
+                        single_line = self.proc_line(single_line,ego_SE3_city)
+                        line_list.append(single_line)
+                else:
+                    new_line = self.proc_line(new_line, ego_SE3_city)
+                    line_list.append(new_line)
+        map_divider_geom.append(('divider',line_list))
+        return map_divider_geom
+    def _one_type_line_geom_to_instances(self, line_geom):
+        line_instances = []
+        for line in line_geom:
+            if not line.is_empty:
+                if line.geom_type == 'MultiLineString':
+                    for single_line in line.geoms:
+                        line_instances.append(single_line)
+                elif line.geom_type == 'LineString':
+                    line_instances.append(line)
+                else:
+                    raise NotImplementedError
+        return line_instances
+    def ped_poly_geoms_to_instances(self, ped_geom):
+        ped = ped_geom[0][1]
+        # union_segments = ops.unary_union(ped)
+        # union_segments = MultiPolygon(ped)
+        max_x = self.patch_size[1] / 2
+        max_y = self.patch_size[0] / 2
+        # local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
+        local_patch = box(-max_x - 0.2, -max_y - 0.2, max_x + 0.2, max_y + 0.2)
+        exteriors = []
+        interiors = []
+        # if union_segments.geom_type != 'MultiPolygon':
+        #     union_segments = MultiPolygon([union_segments])
+        for segments in ped:
+            if segments.geom_type != 'MultiPolygon':
+                segments = MultiPolygon([segments])
+            for poly in segments.geoms:
+                exteriors.append(poly.exterior)
+                for inter in poly.interiors:
+                    interiors.append(inter)
+        results = []
+        for ext in exteriors:
+            if ext.is_ccw:
+                ext.coords = list(ext.coords)[::-1]
+            lines = ext.intersection(local_patch)
+            if isinstance(lines, MultiLineString):
+                lines = ops.linemerge(lines)
+            results.append(lines)
+        for inter in interiors:
+            if not inter.is_ccw:
+                inter.coords = list(inter.coords)[::-1]
+            lines = inter.intersection(local_patch)
+            if isinstance(lines, MultiLineString):
+                lines = ops.linemerge(lines)
+            results.append(lines)
+        return self._one_type_line_geom_to_instances(results)
+    def bound_poly_geoms_to_instances(self, polygon_geom):
+        # roads = polygon_geom[0][1]
+        # lanes = polygon_geom[1][1]
+        # union_roads = ops.unary_union(roads)
+        # union_lanes = ops.unary_union(lanes)
+        # union_segments = ops.unary_union([union_roads, union_lanes])
+        # import pdb;pdb.set_trace()
+        bounds = polygon_geom[0][1]
+        union_segments = ops.unary_union(bounds)
+        max_x = self.patch_size[1] / 2
+        max_y = self.patch_size[0] / 2
+        local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)
+        exteriors = []
+        interiors = []
+        if union_segments.geom_type != 'MultiPolygon':
+            union_segments = MultiPolygon([union_segments])
+        for poly in union_segments.geoms:
+            exteriors.append(poly.exterior)
+            for inter in poly.interiors:
+                interiors.append(inter)
+        results = []
+        for ext in exteriors:
+            if ext.is_ccw:
+                ext.coords = list(ext.coords)[::-1]
+            lines = ext.intersection(local_patch)
+            if isinstance(lines, MultiLineString):
+                lines = ops.linemerge(lines)
+            results.append(lines)
+        for inter in interiors:
+            if not inter.is_ccw:
+                inter.coords = list(inter.coords)[::-1]
+            lines = inter.intersection(local_patch)
+            if isinstance(lines, MultiLineString):
+                lines = ops.linemerge(lines)
+            results.append(lines)
+        return self._one_type_line_geom_to_instances(results)
+    def line_geoms_to_instances(self, line_geom):
+        lines = line_geom[0][1]
+        multiline = MultiLineString(lines)
+        union_lines = ops.unary_union(multiline)
+        if union_lines.geom_type == 'LineString':
+            return self._one_type_line_geom_to_instances([union_lines])
+        before_num = len(union_lines.geoms)
+        # import pdb;pdb.set_trace()
+        merged_lines = ops.linemerge(union_lines)
+        if merged_lines.geom_type == 'LineString':
+            return self._one_type_line_geom_to_instances([merged_lines])
+        after_num = len(merged_lines.geoms)
+        # import pdb;pdb.set_trace()
+        while after_num != before_num:
+            before_num = len(merged_lines.geoms)
+            merged_lines = ops.unary_union(merged_lines)
+            if merged_lines.geom_type == 'LineString':
+                break
+            merged_lines = ops.linemerge(merged_lines)
+            if merged_lines.geom_type == 'LineString':
+                break
+            after_num = len(merged_lines.geoms)
+        return self._one_type_line_geom_to_instances([merged_lines])
+    def sample_pts_from_line(self, line):
+        if self.fixed_num < 0:
+            distances = np.arange(0, line.length, self.sample_dist)
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+        else:
+            # fixed number of points, so distance is line.length / self.fixed_num
+            distances = np.linspace(0, line.length, self.fixed_num)
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+        num_valid = len(sampled_points)
+        if not self.padding or self.fixed_num > 0:
+            # fixed num sample can return now!
+            return sampled_points, num_valid
+        # fixed distance sampling need padding!
+        num_valid = len(sampled_points)
+        if self.fixed_num < 0:
+            if num_valid < self.num_samples:
+                padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            else:
+                sampled_points = sampled_points[:self.num_samples, :]
+                num_valid = self.num_samples
+        return sampled_points, num_valid
+@DATASETS.register_module()
+class CustomAV2LocalMapDataset(CustomNuScenesDataset):
+    r"""NuScenes Dataset.
+    This datset add static map elements
+    """
+    MAPCLASSES = ('divider',)
+    def __init__(self,
+                 map_ann_file=None, 
+                 queue_length=4, 
+                 code_size=2,
+                 bev_size=(200, 200), 
+                 pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
+                 overlap_test=False, 
+                 fixed_ptsnum_per_line=-1,
+                 eval_use_same_gt_sample_num_flag=False,
+                 padding_value=-10000,
+                 map_classes=None,
+                 *args, 
+                 **kwargs):
+        super().__init__(*args, **kwargs)
+        self.map_ann_file = map_ann_file
+        self.code_size = code_size
+        self.queue_length = queue_length
+        self.overlap_test = overlap_test
+        self.bev_size = bev_size
+        self.MAPCLASSES = self.get_map_classes(map_classes)
+        self.NUM_MAPCLASSES = len(self.MAPCLASSES)
+        self.pc_range = pc_range
+        patch_h = pc_range[4]-pc_range[1]
+        patch_w = pc_range[3]-pc_range[0]
+        self.patch_size = (patch_h, patch_w)
+        self.padding_value = padding_value
+        self.fixed_num = fixed_ptsnum_per_line
+        self.eval_use_same_gt_sample_num_flag = eval_use_same_gt_sample_num_flag
+        self.vector_map = VectorizedAV2LocalMap(kwargs['data_root'], 
+                            patch_size=self.patch_size, test_mode=self.test_mode, 
+                            map_classes=self.MAPCLASSES, 
+                            fixed_ptsnum_per_line=fixed_ptsnum_per_line,
+                            padding_value=self.padding_value)
+        self.is_vis_on_test = False
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+        Args:
+            ann_file (str): Path of the annotation file.
+        Returns:
+            list[dict]: List of annotations sorted by timestamps.
+        """
+        # import pdb;pdb.set_trace()
+        data = mmcv.load(ann_file)
+        # import pdb;pdb.set_trace()
+        data_infos = list(sorted(data['samples'], key=lambda e: e['timestamp']))
+        data_infos = data_infos[::self.load_interval]
+        # data_infos = [ data_info.update(dict(token= str(data_info['timestamp']+data_info['log_id'])))  for data_info in data_infos]
+        self.id2map = data['id2map']
+        self.metadata = None
+        self.version = None
+        return data_infos
+    @classmethod
+    def get_map_classes(cls, map_classes=None):
+        """Get class names of current dataset.
+        Args:
+            classes (Sequence[str] | str | None): If classes is None, use
+                default CLASSES defined by builtin dataset. If classes is a
+                string, take it as a file name. The file contains the name of
+                classes where each line contains one class name. If classes is
+                a tuple or list, override the CLASSES defined by the dataset.
+        Return:
+            list[str]: A list of class names.
+        """
+        if map_classes is None:
+            return cls.MAPCLASSES
+        if isinstance(map_classes, str):
+            # take it as a file path
+            class_names = mmcv.list_from_file(map_classes)
+        elif isinstance(map_classes, (tuple, list)):
+            class_names = map_classes
+        else:
+            raise ValueError(f'Unsupported type {type(map_classes)} of map classes.')
+        return class_names
+    def vectormap_pipeline(self, example, input_dict):
+        '''
+        `example` type: <class 'dict'>
+            keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';
+                  all keys type is 'DataContainer';
+                  'img_metas' cpu_only=True, type is dict, others are false;
+                  'gt_labels_3d' shape torch.size([num_samples]), stack=False,
+                                padding_value=0, cpu_only=False
+                  'gt_bboxes_3d': stack=False, cpu_only=True
+        '''
+        # import pdb;pdb.set_trace()
+        location = input_dict['log_id']
+        e2g_translation = input_dict['e2g_translation']
+        e2g_rotation = input_dict['e2g_rotation']
+        map_elements = self.id2map[location]
+        anns_results = self.vector_map.gen_vectorized_samples(location, map_elements, e2g_translation, e2g_rotation)
+        '''
+        anns_results, type: dict
+            'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates
+            'gt_vecs_pts_num': list[num_vecs], vec with num_points
+            'gt_vecs_label': list[num_vecs], vec with cls index
+        '''
+        gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])
+        if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):
+            gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']
+        else:
+            gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])
+            try:
+                gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)
+            except:
+                # empty tensor, will be passed in train, 
+                # but we preserve it for test
+                # import pdb;pdb.set_trace()
+                gt_vecs_pts_loc = gt_vecs_pts_loc
+        # import ipdb;ipdb.set_trace()
+        example['gt_labels_3d'] = DC(gt_vecs_label, cpu_only=False)
+        example['gt_bboxes_3d'] = DC(gt_vecs_pts_loc, cpu_only=True)
+        # import pdb;pdb.set_trace()
+        # if self.is_vis_on_test:
+        #     lidar2global_translation = to_tensor(lidar2global_translation)
+        #     example['lidar2global_translation'] = DC(lidar2global_translation, cpu_only=True)
+        # else:
+        # example['img_metas'].data['lidar2global_translation'] = lidar2global_translation
+        return example
+    def prepare_train_data(self, index):
+        """
+        Training data preparation.
+        Args:
+            index (int): Index for accessing the target data.
+        Returns:
+            dict: Training data dict of the corresponding index.
+        """
+        data_queue = []
+        # temporal aug
+        prev_indexs_list = list(range(index-self.queue_length, index))
+        random.shuffle(prev_indexs_list)
+        prev_indexs_list = sorted(prev_indexs_list[1:], reverse=True)
+        ##
+        input_dict = self.get_data_info(index)
+        if input_dict is None:
+            return None
+        frame_idx = input_dict['timestamp']
+        scene_token = input_dict['log_id']
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        # import pdb;pdb.set_trace()
+        example = self.vectormap_pipeline(example,input_dict)
+        if self.filter_empty_gt and \
+                (example is None or ~(example['gt_labels_3d']._data != -1).any()):
+            return None
+        data_queue.insert(0, example)
+        return self.union2one(data_queue)
+    def union2one(self, queue):
+        """
+        convert sample queue into one single sample.
+        """
+        imgs_list = [each['img'].data for each in queue]
+        metas_map = {}
+        prev_pos = None
+        prev_angle = None
+        for i, each in enumerate(queue):
+            metas_map[i] = each['img_metas'].data
+            if i == 0:
+                metas_map[i]['prev_bev'] = False
+                prev_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
+                prev_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
+                metas_map[i]['can_bus'][:3] = 0
+                metas_map[i]['can_bus'][-1] = 0
+            else:
+                metas_map[i]['prev_bev'] = True
+                tmp_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
+                tmp_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
+                metas_map[i]['can_bus'][:3] -= prev_pos
+                metas_map[i]['can_bus'][-1] -= prev_angle
+                prev_pos = copy.deepcopy(tmp_pos)
+                prev_angle = copy.deepcopy(tmp_angle)
+        queue[-1]['img'] = DC(torch.stack(imgs_list),
+                              cpu_only=False, stack=True)
+        queue[-1]['img_metas'] = DC(metas_map, cpu_only=True)
+        queue = queue[-1]
+        return queue
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+        Args:
+            index (int): Index of the sample data to get.
+        Returns:
+            dict: Data information that will be passed to the data \
+                preprocessing pipelines. It includes the following keys:
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - sweeps (list[dict]): Infos of sweeps.
+                - timestamp (float): Sample timestamp.
+                - img_filename (str, optional): Image filename.
+                - lidar2img (list[np.ndarray], optional): Transformations \
+                    from lidar to different cameras.
+                - ann_info (dict): Annotation info.
+        """
+        info = self.data_infos[index]
+        # standard protocal modified from SECOND.Pytorch
+        input_dict = dict(
+            timestamp=info['timestamp'],
+            e2g_translation=info['e2g_translation'],
+            e2g_rotation=info['e2g_rotation'],
+            log_id=info['log_id'],
+            scene_token=info['log_id'],
+        )
+        if self.modality['use_camera']:
+            image_paths = []
+            cam_intrinsics = []
+            lidar2img_rts = []
+            lidar2cam_rts = []
+            cam_types = []
+            for cam_type, cam_info in info['cams'].items():
+                image_paths.append(cam_info['img_fpath'])
+                # camera intrinsics
+                camera_intrinsics = np.eye(4).astype(np.float32)
+                camera_intrinsics[:3, :3] = cam_info["intrinsics"]
+                # input_dict["camera_intrinsics"].append(camera_intrinsics)
+                # ego2img, ego = lidar
+                lidar2cam_rt = cam_info['extrinsics']
+                intrinsic = cam_info['intrinsics']
+                viewpad = np.eye(4)
+                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
+                lidar2img_rt = (viewpad @ lidar2cam_rt)
+                lidar2img_rts.append(lidar2img_rt)
+                lidar2cam_rts.append(lidar2cam_rt)
+                cam_intrinsics.append(viewpad)
+                cam_types.append(cam_type)
+            input_dict.update(
+                dict(
+                    img_filename=image_paths,
+                    lidar2img=lidar2img_rts,
+                    cam_intrinsic=cam_intrinsics,
+                    lidar2cam=lidar2cam_rts,
+                    cam_type=cam_types,
+                ))
+        if not self.test_mode:
+            # annos = self.get_ann_info(index)
+            input_dict['ann_info'] = dict()
+        translation = input_dict['e2g_translation']
+        can_bus = np.ones(18)
+        # can_bus.extend(translation.tolist())
+        can_bus[:3] = translation
+        rotation = Quaternion._from_matrix(input_dict['e2g_rotation'])
+        can_bus[3:7] = rotation
+        patch_angle = quaternion_yaw(rotation) / np.pi * 180
+        if patch_angle < 0:
+            patch_angle += 360
+        can_bus[-2] = patch_angle / 180 * np.pi
+        can_bus[-1] = patch_angle
+        input_dict['can_bus'] = can_bus
+        # import pdb;pdb.set_trace()
+        return input_dict
+    def prepare_test_data(self, index):
+        """Prepare data for testing.
+        Args:
+            index (int): Index for accessing the target data.
+        Returns:
+            dict: Testing data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        if self.is_vis_on_test:
+            example = self.vectormap_pipeline(example, input_dict)
+        return example
+    def __getitem__(self, idx):
+        """Get item from infos according to the given index.
+        Returns:
+            dict: Data dictionary of the corresponding index.
+        """
+        if self.test_mode:
+            return self.prepare_test_data(idx)
+        while True:
+            data = self.prepare_train_data(idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                continue
+            return data
+    def _format_gt(self):
+        gt_annos = []
+        # import pdb;pdb.set_trace()
+        print('Start to convert gt map format...')
+        assert self.map_ann_file is not None
+        if (not os.path.exists(self.map_ann_file)) :
+            dataset_length = len(self)
+            prog_bar = mmcv.ProgressBar(dataset_length)
+            mapped_class_names = self.MAPCLASSES
+            for sample_id in range(dataset_length):
+                sample_token = self.data_infos[sample_id]['token']
+                gt_anno = {}
+                gt_anno['sample_token'] = sample_token
+                # gt_sample_annos = []
+                gt_sample_dict = {}
+                gt_sample_dict = self.vectormap_pipeline(gt_sample_dict, self.data_infos[sample_id])
+                gt_labels = gt_sample_dict['gt_labels_3d'].data.numpy()
+                gt_vecs = gt_sample_dict['gt_bboxes_3d'].data.instance_list
+                # import pdb;pdb.set_trace()
+                gt_vec_list = []
+                for i, (gt_label, gt_vec) in enumerate(zip(gt_labels, gt_vecs)):
+                    name = mapped_class_names[gt_label]
+                    anno = dict(
+                        pts=np.array(list(gt_vec.coords))[:,:self.code_size],
+                        pts_num=len(list(gt_vec.coords)),
+                        cls_name=name,
+                        type=gt_label,
+                    )
+                    gt_vec_list.append(anno)
+                gt_anno['vectors']=gt_vec_list
+                gt_annos.append(gt_anno)
+                prog_bar.update()
+            nusc_submissions = {
+                'GTs': gt_annos
+            }
+            print('\n GT anns writes to', self.map_ann_file)
+            mmcv.dump(nusc_submissions, self.map_ann_file)
+        else:
+            print(f'{self.map_ann_file} exist, not update')
+    def _format_bbox(self, results, jsonfile_prefix=None):
+        """Convert the results to the standard format.
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            jsonfile_prefix (str): The prefix of the output jsonfile.
+                You can specify the output directory/filename by
+                modifying the jsonfile_prefix. Default: None.
+        Returns:
+            str: Path of the output json file.
+        """
+        assert self.map_ann_file is not None
+        pred_annos = []
+        mapped_class_names = self.MAPCLASSES
+        # import pdb;pdb.set_trace()
+        print('Start to convert map detection format...')
+        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
+            pred_anno = {}
+            vecs = output_to_vecs(det)
+            sample_token = self.data_infos[sample_id]['token']
+            pred_anno['sample_token'] = sample_token
+            pred_vec_list=[]
+            for i, vec in enumerate(vecs):
+                name = mapped_class_names[vec['label']]
+                anno = dict(
+                    # sample_token=sample_token,
+                    pts=vec['pts'],
+                    pts_num=len(vec['pts']),
+                    cls_name=name,
+                    type=vec['label'],
+                    confidence_level=vec['score'])
+                pred_vec_list.append(anno)
+                # annos.append(nusc_anno)
+            # nusc_annos[sample_token] = annos
+            pred_anno['vectors'] = pred_vec_list
+            pred_annos.append(pred_anno)
+        if not os.path.exists(self.map_ann_file):
+            self._format_gt()
+        else:
+            print(f'{self.map_ann_file} exist, not update')
+        # with open(self.map_ann_file,'r') as f:
+        #     GT_anns = json.load(f)
+        # gt_annos = GT_anns['GTs']
+        nusc_submissions = {
+            'meta': self.modality,
+            'results': pred_annos,
+            # 'GTs': gt_annos
+        }
+        mmcv.mkdir_or_exist(jsonfile_prefix)
+        res_path = osp.join(jsonfile_prefix, 'nuscmap_results.json')
+        print('Results writes to', res_path)
+        mmcv.dump(nusc_submissions, res_path)
+        return res_path
+    def to_gt_vectors(self,
+                      gt_dict):
+        # import pdb;pdb.set_trace()
+        gt_labels = gt_dict['gt_labels_3d'].data
+        gt_instances = gt_dict['gt_bboxes_3d'].data.instance_list
+        gt_vectors = []
+        for gt_instance, gt_label in zip(gt_instances, gt_labels):
+            pts, pts_num = sample_pts_from_line(gt_instance, patch_size=self.patch_size)
+            gt_vectors.append({
+                'pts': pts,
+                'pts_num': pts_num,
+                'type': int(gt_label)
+            })
+        vector_num_list = {}
+        for i in range(self.NUM_MAPCLASSES):
+            vector_num_list[i] = []
+        for vec in gt_vectors:
+            if vector['pts_num'] >= 2:
+                vector_num_list[vector['type']].append((LineString(vector['pts'][:vector['pts_num']]), vector.get('confidence_level', 1)))
+        return gt_vectors
+    def _evaluate_single(self,
+                         result_path,
+                         logger=None,
+                         metric='chamfer',
+                         result_name='pts_bbox'):
+        """Evaluation for a single model in nuScenes protocol.
+        Args:
+            result_path (str): Path of the result file.
+            logger (logging.Logger | str | None): Logger used for printing
+                related information during evaluation. Default: None.
+            metric (str): Metric name used for evaluation. Default: 'bbox'.
+            result_name (str): Result name in the metric prefix.
+                Default: 'pts_bbox'.
+        Returns:
+            dict: Dictionary of evaluation details.
+        """
+        from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import eval_map
+        from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import format_res_gt_by_classes
+        result_path = osp.abspath(result_path)
+        # import pdb;pdb.set_trace()
+        detail = dict()
+        print('Formating results & gts by classes')
+        with open(result_path,'r') as f:
+            pred_results = json.load(f)
+        gen_results = pred_results['results']
+        with open(self.map_ann_file,'r') as ann_f:
+            gt_anns = json.load(ann_f)
+        annotations = gt_anns['GTs']
+        cls_gens, cls_gts = format_res_gt_by_classes(result_path,
+                                                     gen_results,
+                                                     annotations,
+                                                     cls_names=self.MAPCLASSES,
+                                                     num_pred_pts_per_instance=self.fixed_num,
+                                                     eval_use_same_gt_sample_num_flag=self.eval_use_same_gt_sample_num_flag,
+                                                     pc_range=self.pc_range)
+        metrics = metric if isinstance(metric, list) else [metric]
+        allowed_metrics = ['chamfer', 'iou']
+        for metric in metrics:
+            if metric not in allowed_metrics:
+                raise KeyError(f'metric {metric} is not supported')
+        for metric in metrics:
+            print('-*'*10+f'use metric:{metric}'+'-*'*10)
+            if metric == 'chamfer':
+                thresholds = [0.5,1.0,1.5]
+            elif metric == 'iou':
+                thresholds= np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
+            cls_aps = np.zeros((len(thresholds),self.NUM_MAPCLASSES))
+            for i, thr in enumerate(thresholds):
+                print('-*'*10+f'threshhold:{thr}'+'-*'*10)
+                mAP, cls_ap = eval_map(
+                                gen_results,
+                                annotations,
+                                cls_gens,
+                                cls_gts,
+                                threshold=thr,
+                                cls_names=self.MAPCLASSES,
+                                logger=logger,
+                                num_pred_pts_per_instance=self.fixed_num,
+                                pc_range=self.pc_range,
+                                metric=metric)
+                for j in range(self.NUM_MAPCLASSES):
+                    cls_aps[i, j] = cls_ap[j]['ap']
+            for i, name in enumerate(self.MAPCLASSES):
+                print('{}: {}'.format(name, cls_aps.mean(0)[i]))
+                detail['AV2Map_{}/{}_AP'.format(metric,name)] =  cls_aps.mean(0)[i]
+            print('map: {}'.format(cls_aps.mean(0).mean()))
+            detail['AV2Map_{}/mAP'.format(metric)] = cls_aps.mean(0).mean()
+            for i, name in enumerate(self.MAPCLASSES):
+                for j, thr in enumerate(thresholds):
+                    if metric == 'chamfer':
+                        detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
+                    elif metric == 'iou':
+                        if thr == 0.5 or thr == 0.75:
+                            detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
+        return detail
+    def evaluate(self,
+                 results,
+                 metric='bbox',
+                 logger=None,
+                 jsonfile_prefix=None,
+                 result_names=['pts_bbox'],
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluation in nuScenes protocol.
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            metric (str | list[str]): Metrics to be evaluated.
+            logger (logging.Logger | str | None): Logger used for printing
+                related information during evaluation. Default: None.
+            jsonfile_prefix (str | None): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            show (bool): Whether to visualize.
+                Default: False.
+            out_dir (str): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+        Returns:
+            dict[str, float]: Results of each evaluation metric.
+        """
+        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
+        if isinstance(result_files, dict):
+            results_dict = dict()
+            for name in result_names:
+                print('Evaluating bboxes of {}'.format(name))
+                ret_dict = self._evaluate_single(result_files[name], metric=metric)
+            results_dict.update(ret_dict)
+        elif isinstance(result_files, str):
+            results_dict = self._evaluate_single(result_files, metric=metric)
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+        if show:
+            self.show(results, out_dir, pipeline=pipeline)
+        return results_dict
+def output_to_vecs(detection):
+    box3d = detection['boxes_3d'].numpy()
+    scores = detection['scores_3d'].numpy()
+    labels = detection['labels_3d'].numpy()
+    pts = detection['pts_3d'].numpy()
+    vec_list = []
+    # import pdb;pdb.set_trace()
+    for i in range(box3d.shape[0]):
+        vec = dict(
+            bbox = box3d[i], # xyxy
+            label=labels[i],
+            score=scores[i],
+            pts=pts[i],
+        )
+        vec_list.append(vec)
+    return vec_list
+def sample_pts_from_line(line, 
+                         fixed_num=-1,
+                         sample_dist=1,
+                         normalize=False,
+                         patch_size=None,
+                         padding=False,
+                         num_samples=250,):
+    if fixed_num < 0:
+        distances = np.arange(0, line.length, sample_dist)
+        if line.has_z:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+        else:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+    else:
+        # fixed number of points, so distance is line.length / fixed_num
+        distances = np.linspace(0, line.length, fixed_num)
+        if line.has_z:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+        else:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+    if normalize:
+        sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
+    num_valid = len(sampled_points)
+    if not padding or fixed_num > 0:
+        # fixed num sample can return now!
+        return sampled_points, num_valid
+    # fixed distance sampling need padding!
+    num_valid = len(sampled_points)
+    if fixed_num < 0:
+        if num_valid < num_samples:
+            padding = np.zeros((num_samples - len(sampled_points), sampled_points.shape[-1]))
+            sampled_points = np.concatenate([sampled_points, padding], axis=0)
+        else:
+            sampled_points = sampled_points[:num_samples, :]
+            num_valid = num_samples
+        if normalize:
+            sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
+            num_valid = len(sampled_points)
+    return sampled_points[:,:2], num_valid
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/av2_offlinemap_dataset.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/av2_offlinemap_dataset.py
+import copy
+import numpy as np
+from mmdet.datasets import DATASETS
+from mmdet3d.datasets import NuScenesDataset
+import mmcv
+import os
+from os import path as osp
+from mmdet.datasets import DATASETS
+import torch
+import numpy as np
+from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
+from projects.mmdet3d_plugin.models.utils.visual import save_tensor
+from mmcv.parallel import DataContainer as DC
+import random
+from .nuscenes_dataset import CustomNuScenesDataset
+from nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer
+from nuscenes.eval.common.utils import quaternion_yaw, Quaternion
+from shapely import affinity, ops
+from shapely.geometry import Polygon, LineString, box, MultiPolygon, MultiLineString
+from mmdet.datasets.pipelines import to_tensor
+import json
+from pathlib import Path
+from av2.datasets.sensor.av2_sensor_dataloader import AV2SensorDataLoader
+from av2.map.lane_segment import LaneMarkType, LaneSegment
+from av2.map.map_api import ArgoverseStaticMap
+from av2.geometry.se3 import SE3
+import av2.geometry.interpolate as interp_utils
+import cv2
+def perspective(cam_coords, proj_mat):
+    pix_coords = proj_mat @ cam_coords
+    valid_idx = pix_coords[2, :] > 0
+    pix_coords = pix_coords[:, valid_idx]
+    pix_coords = pix_coords[:2, :] / (pix_coords[2, :] + 1e-7)
+    pix_coords = pix_coords.transpose(1, 0)
+    return pix_coords
+class LiDARInstanceLines(object):
+    """Line instance in LIDAR coordinates
+    """
+    def __init__(self, 
+                 instance_line_list, 
+                 instance_labels,
+                 sample_dist=1,
+                 num_samples=250,
+                 padding=False,
+                 fixed_num=-1,
+                 padding_value=-10000,
+                 patch_size=None,
+                 code_size=2,
+                 min_z=-5,
+                 max_z=3,):
+        assert isinstance(instance_line_list, list)
+        assert patch_size is not None
+        if len(instance_line_list) != 0:
+            assert isinstance(instance_line_list[0], LineString)
+        self.patch_size = patch_size
+        self.max_x = self.patch_size[1] / 2
+        self.max_y = self.patch_size[0] / 2
+        self.sample_dist = sample_dist
+        self.num_samples = num_samples
+        self.padding = padding
+        self.fixed_num = fixed_num
+        self.padding_value = padding_value
+        self.instance_list = instance_line_list
+        self.code_size = code_size
+        self.min_z = min_z
+        self.max_z = max_z
+        self.instance_labels = instance_labels
+    @property
+    def start_end_points(self):
+        """
+        return torch.Tensor([N,4]), in xstart, ystart, xend, yend form
+        """
+        assert len(self.instance_list) != 0
+        instance_se_points_list = []
+        for instance in self.instance_list:
+            se_points = []
+            se_points.extend(instance.coords[0])
+            se_points.extend(instance.coords[-1])
+            instance_se_points_list.append(se_points)
+        instance_se_points_array = np.array(instance_se_points_list)
+        instance_se_points_tensor = to_tensor(instance_se_points_array)
+        instance_se_points_tensor = instance_se_points_tensor.to(
+                                dtype=torch.float32)
+        instance_se_points_tensor[:,0] = torch.clamp(instance_se_points_tensor[:,0], min=-self.max_x,max=self.max_x)
+        instance_se_points_tensor[:,1] = torch.clamp(instance_se_points_tensor[:,1], min=-self.max_y,max=self.max_y)
+        instance_se_points_tensor[:,2] = torch.clamp(instance_se_points_tensor[:,2], min=-self.max_x,max=self.max_x)
+        instance_se_points_tensor[:,3] = torch.clamp(instance_se_points_tensor[:,3], min=-self.max_y,max=self.max_y)
+        return instance_se_points_tensor
+    @property
+    def bbox(self):
+        """
+        return torch.Tensor([N,4]), in xmin, ymin, xmax, ymax form
+        """
+        assert len(self.instance_list) != 0
+        instance_bbox_list = []
+        for instance in self.instance_list:
+            # bounds is bbox: [xmin, ymin, xmax, ymax]
+            instance_bbox_list.append(instance.bounds)
+        instance_bbox_array = np.array(instance_bbox_list)
+        instance_bbox_tensor = to_tensor(instance_bbox_array)
+        instance_bbox_tensor = instance_bbox_tensor.to(
+                            dtype=torch.float32)
+        instance_bbox_tensor[:,0] = torch.clamp(instance_bbox_tensor[:,0], min=-self.max_x,max=self.max_x)
+        instance_bbox_tensor[:,1] = torch.clamp(instance_bbox_tensor[:,1], min=-self.max_y,max=self.max_y)
+        instance_bbox_tensor[:,2] = torch.clamp(instance_bbox_tensor[:,2], min=-self.max_x,max=self.max_x)
+        instance_bbox_tensor[:,3] = torch.clamp(instance_bbox_tensor[:,3], min=-self.max_y,max=self.max_y)
+        return instance_bbox_tensor
+    @property
+    def fixed_num_sampled_points(self):
+        """
+        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
+            N means the num of instances
+        """
+        assert len(self.instance_list) != 0
+        instance_points_list = []
+        for instance in self.instance_list:
+            # instance_array = np.array(list(instance.coords))
+            # interpolated_instance = interp_utils.interp_arc(t=self.fixed_num, points=instance_array)
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances])
+            if instance.has_z:
+                sampled_points = sampled_points.reshape(-1,3)
+            else:
+                sampled_points = sampled_points.reshape(-1,2)
+            # import pdb;pdb.set_trace()
+            instance_points_list.append(sampled_points)
+        instance_points_array = np.array(instance_points_list)
+        instance_points_tensor = to_tensor(instance_points_array)
+        instance_points_tensor = instance_points_tensor.to(
+                            dtype=torch.float32)
+        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+        instance_points_tensor[:,:,2] = torch.clamp(instance_points_tensor[:,:,2], min=self.min_z,max=self.max_z)
+        return instance_points_tensor
+    @property
+    def fixed_num_sampled_points_ambiguity(self):
+        """
+        return torch.Tensor([N,fixed_num,3]), in xmin, ymin, xmax, ymax form
+            N means the num of instances
+        """
+        assert len(self.instance_list) != 0
+        instance_points_list = []
+        for instance in self.instance_list:
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            if instance.has_z:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+            else:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+            instance_points_list.append(sampled_points)
+        instance_points_array = np.array(instance_points_list)
+        instance_points_tensor = to_tensor(instance_points_array)
+        instance_points_tensor = instance_points_tensor.to(
+                            dtype=torch.float32)
+        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+        instance_points_tensor[:,:,2] = torch.clamp(instance_points_tensor[:,:,2], min=self.min_z,max=self.max_z)
+        instance_points_tensor = instance_points_tensor if is_3d else instance_points_tensor[:,:,:2]
+        instance_points_tensor = instance_points_tensor.unsqueeze(1)
+        return instance_points_tensor
+    @property
+    def fixed_num_sampled_points_torch(self):
+        """
+        return torch.Tensor([N,fixed_num,2]), in xmin, ymin, xmax, ymax form
+            N means the num of instances
+        """
+        assert len(self.instance_list) != 0
+        instance_points_list = []
+        for instance in self.instance_list:
+            # distances = np.linspace(0, instance.length, self.fixed_num)
+            # sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+            poly_pts = to_tensor(np.array(list(instance.coords)))
+            poly_pts = poly_pts.unsqueeze(0).permute(0,2,1)
+            sampled_pts = torch.nn.functional.interpolate(poly_pts,size=(self.fixed_num),mode='linear',align_corners=True)
+            sampled_pts = sampled_pts.permute(0,2,1).squeeze(0)
+            instance_points_list.append(sampled_pts)
+        # instance_points_array = np.array(instance_points_list)
+        # instance_points_tensor = to_tensor(instance_points_array)
+        instance_points_tensor = torch.stack(instance_points_list,dim=0)
+        instance_points_tensor = instance_points_tensor.to(
+                            dtype=torch.float32)
+        instance_points_tensor[:,:,0] = torch.clamp(instance_points_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+        instance_points_tensor[:,:,1] = torch.clamp(instance_points_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+        instance_points_tensor[:,:,2] = torch.clamp(instance_points_tensor[:,:,2], min=self.min_z,max=self.max_z)
+        return instance_points_tensor
+    @property
+    def shift_fixed_num_sampled_points(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            fixed_num = fixed_num_pts.shape[0]
+            shift_pts_list = []
+            if is_poly:
+                # import pdb;pdb.set_trace()
+                for shift_right_i in range(fixed_num):
+                    shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
+            if not is_poly:
+                padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v1(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            pts_num = fixed_num_pts.shape[0]
+            shift_num = pts_num - 1
+            if is_poly:
+                pts_to_shift = fixed_num_pts[:-1,:]
+            shift_pts_list = []
+            if is_poly:
+                for shift_right_i in range(shift_num):
+                    shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            if is_poly:
+                _, _, num_coords = shift_pts.shape
+                tmp_shift_pts = shift_pts.new_zeros((shift_num, pts_num, num_coords))
+                tmp_shift_pts[:,:-1,:] = shift_pts
+                tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
+                shift_pts = tmp_shift_pts
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
+            if not is_poly:
+                padding = torch.full([shift_num-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v2(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        assert len(self.instance_list) != 0
+        instances_list = []
+        for idx, instance in enumerate(self.instance_list):
+            instance_label = self.instance_labels[idx]
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            poly_pts = np.array(list(instance.coords))
+            start_pts = poly_pts[0]
+            end_pts = poly_pts[-1]
+            is_poly = np.equal(start_pts, end_pts)
+            is_poly = is_poly.all()
+            shift_pts_list = []
+            pts_num, coords_num = poly_pts.shape
+            shift_num = pts_num - 1
+            final_shift_num = self.fixed_num - 1
+            if instance_label == 3:
+                # import ipdb;ipdb.set_trace()
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                shift_pts_list.append(sampled_points)
+            else:
+                if is_poly:
+                    pts_to_shift = poly_pts[:-1,:]
+                    for shift_right_i in range(shift_num):
+                        shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
+                        pts_to_concat = shift_pts[0]
+                        pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
+                        shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
+                        shift_instance = LineString(shift_pts)
+                        shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                        shift_pts_list.append(shift_sampled_points)
+                    # import pdb;pdb.set_trace()
+                else:
+                    sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                    flip_sampled_points = np.flip(sampled_points, axis=0)
+                    shift_pts_list.append(sampled_points)
+                    shift_pts_list.append(flip_sampled_points)
+            multi_shifts_pts = np.stack(shift_pts_list,axis=0)
+            shifts_num,_,_ = multi_shifts_pts.shape
+            if shifts_num > final_shift_num:
+                index = np.random.choice(multi_shifts_pts.shape[0], final_shift_num, replace=False)
+                multi_shifts_pts = multi_shifts_pts[index]
+            multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
+            multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
+                            dtype=torch.float32)
+            multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+            multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+            multi_shifts_pts_tensor[:,:,2] = torch.clamp(multi_shifts_pts_tensor[:,:,2], min=self.min_z,max=self.max_z)
+            # if not is_poly:
+            if multi_shifts_pts_tensor.shape[0] < final_shift_num:
+                padding = torch.full([final_shift_num-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
+                multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
+            instances_list.append(multi_shifts_pts_tensor)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor[...,:self.code_size]
+    @property
+    def shift_fixed_num_sampled_points_v3(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        assert len(self.instance_list) != 0
+        instances_list = []
+        for instance in self.instance_list:
+            distances = np.linspace(0, instance.length, self.fixed_num)
+            poly_pts = np.array(list(instance.coords))
+            start_pts = poly_pts[0]
+            end_pts = poly_pts[-1]
+            is_poly = np.equal(start_pts, end_pts)
+            is_poly = is_poly.all()
+            shift_pts_list = []
+            pts_num, coords_num = poly_pts.shape
+            shift_num = pts_num - 1
+            final_shift_num = self.fixed_num - 1
+            if is_poly:
+                pts_to_shift = poly_pts[:-1,:]
+                for shift_right_i in range(shift_num):
+                    shift_pts = np.roll(pts_to_shift,shift_right_i,axis=0)
+                    pts_to_concat = shift_pts[0]
+                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
+                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
+                    shift_instance = LineString(shift_pts)
+                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                    shift_pts_list.append(shift_sampled_points)
+                flip_pts_to_shift = np.flip(pts_to_shift, axis=0)
+                for shift_right_i in range(shift_num):
+                    shift_pts = np.roll(flip_pts_to_shift,shift_right_i,axis=0)
+                    pts_to_concat = shift_pts[0]
+                    pts_to_concat = np.expand_dims(pts_to_concat,axis=0)
+                    shift_pts = np.concatenate((shift_pts,pts_to_concat),axis=0)
+                    shift_instance = LineString(shift_pts)
+                    shift_sampled_points = np.array([list(shift_instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                    shift_pts_list.append(shift_sampled_points)
+                # import pdb;pdb.set_trace()
+            else:
+                sampled_points = np.array([list(instance.interpolate(distance).coords) for distance in distances]).reshape(-1, coords_num)
+                flip_sampled_points = np.flip(sampled_points, axis=0)
+                shift_pts_list.append(sampled_points)
+                shift_pts_list.append(flip_sampled_points)
+            multi_shifts_pts = np.stack(shift_pts_list,axis=0)
+            shifts_num,_,_ = multi_shifts_pts.shape
+            # import pdb;pdb.set_trace()
+            if shifts_num > 2*final_shift_num:
+                index = np.random.choice(shift_num, final_shift_num, replace=False)
+                flip0_shifts_pts = multi_shifts_pts[index]
+                flip1_shifts_pts = multi_shifts_pts[index+shift_num]
+                multi_shifts_pts = np.concatenate((flip0_shifts_pts,flip1_shifts_pts),axis=0)
+            multi_shifts_pts_tensor = to_tensor(multi_shifts_pts)
+            multi_shifts_pts_tensor = multi_shifts_pts_tensor.to(
+                            dtype=torch.float32)
+            multi_shifts_pts_tensor[:,:,0] = torch.clamp(multi_shifts_pts_tensor[:,:,0], min=-self.max_x,max=self.max_x)
+            multi_shifts_pts_tensor[:,:,1] = torch.clamp(multi_shifts_pts_tensor[:,:,1], min=-self.max_y,max=self.max_y)
+            multi_shifts_pts_tensor[:,:,2] = torch.clamp(multi_shifts_pts_tensor[:,:,2], min=self.min_z,max=self.max_z)
+            # if not is_poly:
+            if multi_shifts_pts_tensor.shape[0] < 2*final_shift_num:
+                padding = torch.full([final_shift_num*2-multi_shifts_pts_tensor.shape[0],self.fixed_num,multi_shifts_pts_tensor.shape[-1]], self.padding_value)
+                multi_shifts_pts_tensor = torch.cat([multi_shifts_pts_tensor,padding],dim=0)
+            instances_list.append(multi_shifts_pts_tensor)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_v4(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            pts_num = fixed_num_pts.shape[0]
+            shift_num = pts_num - 1
+            shift_pts_list = []
+            if is_poly:
+                pts_to_shift = fixed_num_pts[:-1,:]
+                for shift_right_i in range(shift_num):
+                    shift_pts_list.append(pts_to_shift.roll(shift_right_i,0))
+                flip_pts_to_shift = pts_to_shift.flip(0)
+                for shift_right_i in range(shift_num):
+                    shift_pts_list.append(flip_pts_to_shift.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            if is_poly:
+                _, _, num_coords = shift_pts.shape
+                tmp_shift_pts = shift_pts.new_zeros((shift_num*2, pts_num, num_coords))
+                tmp_shift_pts[:,:-1,:] = shift_pts
+                tmp_shift_pts[:,-1,:] = shift_pts[:,0,:]
+                shift_pts = tmp_shift_pts
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
+            if not is_poly:
+                padding = torch.full([shift_num*2-shift_pts.shape[0],pts_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    @property
+    def shift_fixed_num_sampled_points_torch(self):
+        """
+        return  [instances_num, num_shifts, fixed_num, 2]
+        """
+        fixed_num_sampled_points = self.fixed_num_sampled_points_torch
+        instances_list = []
+        is_poly = False
+        # is_line = False
+        # import pdb;pdb.set_trace()
+        for fixed_num_pts in fixed_num_sampled_points:
+            # [fixed_num, 2]
+            is_poly = fixed_num_pts[0].equal(fixed_num_pts[-1])
+            fixed_num = fixed_num_pts.shape[0]
+            shift_pts_list = []
+            if is_poly:
+                # import pdb;pdb.set_trace()
+                for shift_right_i in range(fixed_num):
+                    shift_pts_list.append(fixed_num_pts.roll(shift_right_i,0))
+            else:
+                shift_pts_list.append(fixed_num_pts)
+                shift_pts_list.append(fixed_num_pts.flip(0))
+            shift_pts = torch.stack(shift_pts_list,dim=0)
+            shift_pts[:,:,0] = torch.clamp(shift_pts[:,:,0], min=-self.max_x,max=self.max_x)
+            shift_pts[:,:,1] = torch.clamp(shift_pts[:,:,1], min=-self.max_y,max=self.max_y)
+            shift_pts[:,:,2] = torch.clamp(shift_pts[:,:,2], min=self.min_z,max=self.max_z)
+            if not is_poly:
+                padding = torch.full([fixed_num-shift_pts.shape[0],fixed_num,shift_pts.shape[-1]], self.padding_value)
+                shift_pts = torch.cat([shift_pts,padding],dim=0)
+                # padding = np.zeros((self.num_samples - len(sampled_points), 2))
+                # sampled_points = np.concatenate([sampled_points, padding], axis=0)
+            instances_list.append(shift_pts)
+        instances_tensor = torch.stack(instances_list, dim=0)
+        instances_tensor = instances_tensor.to(
+                            dtype=torch.float32)
+        return instances_tensor
+    # @property
+    # def polyline_points(self):
+    #     """
+    #     return [[x0,y0],[x1,y1],...]
+    #     """
+    #     assert len(self.instance_list) != 0
+    #     for instance in self.instance_list:
+class VectorizedAV2LocalMap(object):
+    CLASS2LABEL = {
+        'divider': 0,
+        'ped_crossing': 1,
+        'boundary': 2,
+        'centerline': 3,
+        'others': -1
+    }
+    def __init__(self,
+                 canvas_size, 
+                 patch_size,
+                 map_classes=['divider','ped_crossing','boundary'],
+                 sample_dist=1,
+                 num_samples=250,
+                 padding=False,
+                 fixed_ptsnum_per_line=-1,
+                 padding_value=-10000,
+                 code_size=2,
+                 min_z=-2,
+                 max_z=2,
+                 thickness=3,
+                 aux_seg = dict(
+                    use_aux_seg=False,
+                    bev_seg=False,
+                    pv_seg=False,
+                    seg_classes=1,
+                    feat_down_sample=32)):
+        '''
+        Args:
+            fixed_ptsnum_per_line = -1 : no fixed num
+        '''
+        super().__init__()
+        self.vec_classes = map_classes
+        self.sample_dist = sample_dist
+        self.num_samples = num_samples
+        self.padding = padding
+        self.fixed_num = fixed_ptsnum_per_line
+        self.padding_value = padding_value
+        # for semantic mask
+        self.patch_size = patch_size
+        self.canvas_size = canvas_size
+        self.thickness = thickness
+        self.scale_x = self.canvas_size[1] / self.patch_size[1]
+        self.scale_y = self.canvas_size[0] / self.patch_size[0]
+        # self.auxseg_use_sem = auxseg_use_sem
+        self.aux_seg = aux_seg
+        self.code_size =code_size
+    def gen_vectorized_samples(self, map_annotation, example=None, feat_down_sample=32):
+        '''
+        use lidar2global to get gt map layers
+        '''
+        # avm = ArgoverseStaticMap.from_map_dir(log_map_dirpath, build_raster=False)
+        vectors = []
+        for vec_class in self.vec_classes:
+            instance_list = map_annotation[vec_class]
+            for instance in instance_list:
+                if instance.shape[0] < 2:
+                    # print('class : {}, instance : {}, instance_list : {}'.format(vec_class, instance, instance_list))
+                    continue
+                vectors.append((LineString(np.array(instance)), self.CLASS2LABEL.get(vec_class, -1)))
+        filtered_vectors = []
+        gt_pts_loc_3d = []
+        gt_pts_num_3d = []
+        gt_labels = []
+        gt_instance = []
+        # import ipdb;ipdb.set_trace()
+        if self.aux_seg['use_aux_seg']:
+            if self.aux_seg['seg_classes'] == 1:
+                if self.aux_seg['bev_seg']:
+                    gt_semantic_mask = np.zeros((1, self.canvas_size[0], self.canvas_size[1]), dtype=np.uint8)
+                else:
+                    gt_semantic_mask = None
+                # import ipdb;ipdb.set_trace()
+                if self.aux_seg['pv_seg']:
+                    num_cam  = len(example['img_metas'].data['pad_shape'])
+                    img_shape = example['img_metas'].data['pad_shape'][0]
+                    # import ipdb;ipdb.set_trace()
+                    gt_pv_semantic_mask = np.zeros((num_cam, 1, img_shape[0] // feat_down_sample, img_shape[1] // feat_down_sample), dtype=np.uint8)
+                    lidar2img = example['img_metas'].data['lidar2img']
+                    scale_factor = np.eye(4)
+                    scale_factor[0, 0] *= 1/32
+                    scale_factor[1, 1] *= 1/32
+                    lidar2feat = [scale_factor @ l2i for l2i in lidar2img]
+                else:
+                    gt_pv_semantic_mask = None
+                for instance, instance_type in vectors:
+                    if instance_type != -1:
+                        gt_instance.append(instance)
+                        gt_labels.append(instance_type)
+                        if instance.geom_type == 'LineString':
+                            if self.aux_seg['bev_seg']:
+                                self.line_ego_to_mask(instance, gt_semantic_mask[0], color=1, thickness=self.thickness)
+                            if self.aux_seg['pv_seg']:
+                                for cam_index in range(num_cam):
+                                    self.line_ego_to_pvmask(instance, gt_pv_semantic_mask[cam_index][0], lidar2feat[cam_index],color=1, thickness=self.aux_seg['pv_thickness'])
+                        else:
+                            print(instance.geom_type)
+            else:
+                if self.aux_seg['bev_seg']:
+                    gt_semantic_mask = np.zeros((len(self.vec_classes), self.canvas_size[0], self.canvas_size[1]), dtype=np.uint8)
+                else:
+                    gt_semantic_mask = None
+                if self.aux_seg['pv_seg']:
+                    num_cam  = len(example['img_metas'].data['pad_shape'])
+                    gt_pv_semantic_mask = np.zeros((num_cam, len(self.vec_classes), img_shape[0] // feat_down_sample, img_shape[1] // feat_down_sample), dtype=np.uint8)
+                    lidar2img = example['img_metas'].data['lidar2img']
+                    scale_factor = np.eye(4)
+                    scale_factor[0, 0] *= 1/32
+                    scale_factor[1, 1] *= 1/32
+                    lidar2feat = [scale_factor @ l2i for l2i in lidar2img]
+                else:
+                    gt_pv_semantic_mask = None
+                for instance, instance_type in vectors:
+                    if instance_type != -1:
+                        gt_instance.append(instance)
+                        gt_labels.append(instance_type)
+                        if instance.geom_type == 'LineString':
+                            if self.aux_seg['bev_seg']:
+                                self.line_ego_to_mask(instance, gt_semantic_mask[instance_type], color=1, thickness=self.thickness)
+                            if self.aux_seg['pv_seg']:
+                                for cam_index in range(num_cam):
+                                    self.line_ego_to_pvmask(instance, gt_pv_semantic_mask[cam_index][instance_type], lidar2feat[cam_index],color=1, thickness=self.aux_seg['pv_thickness'])
+                        else:
+                            print(instance.geom_type)
+        else:
+            for instance, instance_type in vectors:
+                if instance_type != -1:
+                    gt_instance.append(instance)
+                    gt_labels.append(instance_type)
+            gt_semantic_mask=None
+            gt_pv_semantic_mask=None
+        gt_instance = LiDARInstanceLines(gt_instance, gt_labels, self.sample_dist,
+                        self.num_samples, self.padding, self.fixed_num,self.padding_value, patch_size=self.patch_size, code_size=self.code_size)
+        anns_results = dict(
+            gt_vecs_pts_loc=gt_instance,
+            gt_vecs_label=gt_labels,
+            gt_semantic_mask=gt_semantic_mask,
+            gt_pv_semantic_mask=gt_pv_semantic_mask,
+        )
+        return anns_results
+    def line_ego_to_pvmask(self,
+                          line_ego, 
+                          mask, 
+                          lidar2feat,
+                          color=1, 
+                          thickness=1):
+        distances = np.linspace(0, line_ego.length, 200)
+        coords = np.array([list(line_ego.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+        pts_num = coords.shape[0]
+        ones = np.ones((pts_num,1))
+        lidar_coords = np.concatenate([coords,ones], axis=1).transpose(1,0)
+        pix_coords = perspective(lidar_coords, lidar2feat)
+        cv2.polylines(mask, np.int32([pix_coords]), False, color=color, thickness=thickness)
+    def line_ego_to_mask(self, 
+                         line_ego, 
+                         mask, 
+                         color=1, 
+                         thickness=3):
+        ''' Rasterize a single line to mask.
+        Args:
+            line_ego (LineString): line
+            mask (array): semantic mask to paint on
+            color (int): positive label, default: 1
+            thickness (int): thickness of rasterized lines, default: 3
+        '''
+        trans_x = self.canvas_size[1] / 2
+        trans_y = self.canvas_size[0] / 2
+        line_ego = affinity.scale(line_ego, self.scale_x, self.scale_y, origin=(0, 0))
+        line_ego = affinity.affine_transform(line_ego, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])
+        # print(np.array(list(line_ego.coords), dtype=np.int32).shape)
+        coords = np.array(list(line_ego.coords), dtype=np.int32)[:, :2]
+        coords = coords.reshape((-1, 2))
+        assert len(coords) >= 2
+        cv2.polylines(mask, np.int32([coords]), False, color=color, thickness=thickness)
+@DATASETS.register_module()
+class CustomAV2OfflineLocalMapDataset(CustomNuScenesDataset):
+    r"""NuScenes Dataset.
+    This datset add static map elements
+    """
+    MAPCLASSES = ('divider',)
+    def __init__(self,
+                 map_ann_file=None, 
+                 queue_length=4, 
+                 z_cfg = dict(
+                    pred_z_flag=True,
+                    gt_z_flag=True,
+                 ),
+                 bev_size=(200, 200), 
+                 pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
+                 overlap_test=False, 
+                 fixed_ptsnum_per_line=-1,
+                 eval_use_same_gt_sample_num_flag=False,
+                 padding_value=-10000,
+                 map_classes=None,
+                 aux_seg = dict(
+                    use_aux_seg=False,
+                    bev_seg=False,
+                    pv_seg=False,
+                    seg_classes=1,
+                    feat_down_sample=32,
+                 ),
+                 *args, 
+                 **kwargs):
+        super().__init__(*args, **kwargs)
+        self.map_ann_file = map_ann_file
+        self.z_cfg = z_cfg
+        if z_cfg['gt_z_flag']:
+            self.code_size = 3
+        else:
+            self.code_size = 2
+        self.queue_length = queue_length
+        self.overlap_test = overlap_test
+        self.bev_size = bev_size
+        self.MAPCLASSES = self.get_map_classes(map_classes)
+        self.NUM_MAPCLASSES = len(self.MAPCLASSES)
+        self.pc_range = pc_range
+        patch_h = pc_range[4]-pc_range[1]
+        patch_w = pc_range[3]-pc_range[0]
+        self.patch_size = (patch_h, patch_w)
+        self.min_z = pc_range[2]
+        self.max_z = pc_range[5]
+        self.padding_value = padding_value
+        self.fixed_num = fixed_ptsnum_per_line
+        self.eval_use_same_gt_sample_num_flag = eval_use_same_gt_sample_num_flag
+        self.aux_seg = aux_seg
+        self.vector_map = VectorizedAV2LocalMap(canvas_size=bev_size,
+                                                patch_size=self.patch_size, 
+                                                map_classes=self.MAPCLASSES, 
+                                                fixed_ptsnum_per_line=fixed_ptsnum_per_line,
+                                                padding_value=self.padding_value,
+                                                code_size=self.code_size,
+                                                min_z=self.min_z,
+                                                max_z=self.max_z,
+                                                aux_seg=aux_seg)
+        self.is_vis_on_test = False
+    def load_annotations(self, ann_file):
+        """Load annotations from ann_file.
+        Args:
+            ann_file (str): Path of the annotation file.
+        Returns:
+            list[dict]: List of annotations sorted by timestamps.
+        """
+        data = mmcv.load(ann_file, file_format='pkl')
+        # import pdb;pdb.set_trace()
+        data_infos = list(sorted(data['samples'], key=lambda e: e['timestamp']))
+        data_infos = data_infos[::self.load_interval]
+        # data_infos = [ data_info.update(dict(token= str(data_info['timestamp']+data_info['log_id'])))  for data_info in data_infos]
+        self.metadata = None
+        self.version = None
+        return data_infos
+    @classmethod
+    def get_map_classes(cls, map_classes=None):
+        """Get class names of current dataset.
+        Args:
+            classes (Sequence[str] | str | None): If classes is None, use
+                default CLASSES defined by builtin dataset. If classes is a
+                string, take it as a file name. The file contains the name of
+                classes where each line contains one class name. If classes is
+                a tuple or list, override the CLASSES defined by the dataset.
+        Return:
+            list[str]: A list of class names.
+        """
+        if map_classes is None:
+            return cls.MAPCLASSES
+        if isinstance(map_classes, str):
+            # take it as a file path
+            class_names = mmcv.list_from_file(map_classes)
+        elif isinstance(map_classes, (tuple, list)):
+            class_names = map_classes
+        else:
+            raise ValueError(f'Unsupported type {type(map_classes)} of map classes.')
+        return class_names
+    def vectormap_pipeline(self, example, input_dict):
+        '''
+        `example` type: <class 'dict'>
+            keys: 'img_metas', 'gt_bboxes_3d', 'gt_labels_3d', 'img';
+                  all keys type is 'DataContainer';
+                  'img_metas' cpu_only=True, type is dict, others are false;
+                  'gt_labels_3d' shape torch.size([num_samples]), stack=False,
+                                padding_value=0, cpu_only=False
+                  'gt_bboxes_3d': stack=False, cpu_only=True
+        '''
+        # import ipdb;ipdb.set_trace()
+        anns_results = self.vector_map.gen_vectorized_samples(input_dict['annotation'] if 'annotation' in input_dict.keys() else input_dict['ann_info'],
+                     example=example, feat_down_sample=self.aux_seg['feat_down_sample'])
+        '''
+        anns_results, type: dict
+            'gt_vecs_pts_loc': list[num_vecs], vec with num_points*2 coordinates
+            'gt_vecs_pts_num': list[num_vecs], vec with num_points
+            'gt_vecs_label': list[num_vecs], vec with cls index
+        '''
+        gt_vecs_label = to_tensor(anns_results['gt_vecs_label'])
+        if isinstance(anns_results['gt_vecs_pts_loc'], LiDARInstanceLines):
+            gt_vecs_pts_loc = anns_results['gt_vecs_pts_loc']
+        else:
+            gt_vecs_pts_loc = to_tensor(anns_results['gt_vecs_pts_loc'])
+            try:
+                gt_vecs_pts_loc = gt_vecs_pts_loc.flatten(1).to(dtype=torch.float32)
+            except:
+                # empty tensor, will be passed in train, 
+                # but we preserve it for test
+                gt_vecs_pts_loc = gt_vecs_pts_loc
+        example['gt_labels_3d'] = DC(gt_vecs_label, cpu_only=False)
+        example['gt_bboxes_3d'] = DC(gt_vecs_pts_loc, cpu_only=True)
+        # import pdb;pdb.set_trace()
+        # if self.is_vis_on_test:
+        #     lidar2global_translation = to_tensor(lidar2global_translation)
+        #     example['lidar2global_translation'] = DC(lidar2global_translation, cpu_only=True)
+        # else:
+        # example['img_metas'].data['lidar2global_translation'] = lidar2global_translation
+        if anns_results['gt_semantic_mask'] is not None:
+            example['gt_seg_mask'] = DC(to_tensor(anns_results['gt_semantic_mask']), cpu_only=False)
+        if anns_results['gt_pv_semantic_mask'] is not None:
+            example['gt_pv_seg_mask'] = DC(to_tensor(anns_results['gt_pv_semantic_mask']), cpu_only=False) 
+        return example
+    def prepare_train_data(self, index):
+        """
+        Training data preparation.
+        Args:
+            index (int): Index for accessing the target data.
+        Returns:
+            dict: Training data dict of the corresponding index.
+        """
+        data_queue = []
+        # temporal aug
+        prev_indexs_list = list(range(index-self.queue_length, index))
+        random.shuffle(prev_indexs_list)
+        prev_indexs_list = sorted(prev_indexs_list[1:], reverse=True)
+        ##
+        input_dict = self.get_data_info(index)
+        if input_dict is None:
+            return None
+        frame_idx = input_dict['timestamp']
+        scene_token = input_dict['log_id']
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        example = self.vectormap_pipeline(example,input_dict)
+        if self.filter_empty_gt and \
+                (example is None or ~(example['gt_labels_3d']._data != -1).any()):
+            return None
+        # self.vis_gt(example)
+        data_queue.insert(0, example)
+        return self.union2one(data_queue)
+    def union2one(self, queue):
+        """
+        convert sample queue into one single sample.
+        """
+        imgs_list = [each['img'].data for each in queue]
+        metas_map = {}
+        prev_pos = None
+        prev_angle = None
+        for i, each in enumerate(queue):
+            metas_map[i] = each['img_metas'].data
+            if i == 0:
+                metas_map[i]['prev_bev'] = False
+                prev_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
+                prev_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
+                metas_map[i]['can_bus'][:3] = 0
+                metas_map[i]['can_bus'][-1] = 0
+            else:
+                metas_map[i]['prev_bev'] = True
+                tmp_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])
+                tmp_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])
+                metas_map[i]['can_bus'][:3] -= prev_pos
+                metas_map[i]['can_bus'][-1] -= prev_angle
+                prev_pos = copy.deepcopy(tmp_pos)
+                prev_angle = copy.deepcopy(tmp_angle)
+        queue[-1]['img'] = DC(torch.stack(imgs_list),
+                              cpu_only=False, stack=True)
+        queue[-1]['img_metas'] = DC(metas_map, cpu_only=True)
+        queue = queue[-1]
+        return queue
+    def get_data_info(self, index):
+        """Get data info according to the given index.
+        Args:
+            index (int): Index of the sample data to get.
+        Returns:
+            dict: Data information that will be passed to the data \
+                preprocessing pipelines. It includes the following keys:
+                - sample_idx (str): Sample index.
+                - pts_filename (str): Filename of point clouds.
+                - sweeps (list[dict]): Infos of sweeps.
+                - timestamp (float): Sample timestamp.
+                - img_filename (str, optional): Image filename.
+                - lidar2img (list[np.ndarray], optional): Transformations \
+                    from lidar to different cameras.
+                - ann_info (dict): Annotation info.
+        """
+        info = self.data_infos[index]
+        # standard protocal modified from SECOND.Pytorch
+        input_dict = dict(
+            timestamp=info['timestamp'],
+            pts_filename=info['lidar_path'],
+            lidar_path=info['lidar_path'],
+            ego2global_translation=info['e2g_translation'],
+            ego2global_rotation=info['e2g_rotation'],
+            log_id=info['log_id'],
+            scene_token=info['log_id'],
+        )
+        if self.modality['use_camera']:
+            image_paths = []
+            cam_intrinsics = []
+            ego2img_rts = []
+            ego2cam_rts = []
+            cam_types = []
+            cam2ego_rts = []
+            input_dict["camego2global"] = []
+            for cam_type, cam_info in info['cams'].items():
+                image_paths.append(cam_info['img_fpath'])
+                # camera intrinsics
+                camera_intrinsics = np.eye(4).astype(np.float32)
+                camera_intrinsics[:3, :3] = cam_info["intrinsics"]
+                # input_dict["camera_intrinsics"].append(camera_intrinsics)
+                # ego2img, ego = lidar
+                ego2cam_rt = cam_info['extrinsics']
+                cam2ego_rts.append(np.matrix(ego2cam_rt).I)
+                intrinsic = cam_info['intrinsics']
+                viewpad = np.eye(4)
+                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
+                ego2img_rt = (viewpad @ ego2cam_rt)
+                ego2img_rts.append(ego2img_rt)
+                ego2cam_rts.append(ego2cam_rt)
+                cam_intrinsics.append(viewpad)
+                cam_types.append(cam_type)
+                camego2global = np.eye(4, dtype=np.float32)
+                camego2global[:3, :3] = cam_info['e2g_rotation']
+                camego2global[:3, 3] = cam_info['e2g_translation']
+                camego2global = torch.from_numpy(camego2global)
+                input_dict["camego2global"].append(camego2global)
+            lidar2ego = np.eye(4).astype(np.float32)
+            input_dict.update(
+                dict(
+                    img_filename=image_paths,
+                    lidar2img=ego2img_rts,  # 认为lidar和ego是同一个坐标系
+                    camera_intrinsics=cam_intrinsics,
+                    ego2cam=ego2cam_rts,
+                    camera2ego=cam2ego_rts,
+                    cam_type=cam_types,
+                    lidar2ego=lidar2ego,
+                ))
+        # if not self.test_mode:
+        #     # annos = self.get_ann_info(index)
+        #     input_dict['ann_info'] = dict()
+        input_dict['ann_info'] = info['annotation']
+        translation = input_dict['ego2global_translation']
+        can_bus = np.ones(18)
+        # can_bus.extend(translation.tolist())
+        can_bus[:3] = translation
+        rotation = Quaternion._from_matrix(input_dict['ego2global_rotation'])
+        can_bus[3:7] = rotation
+        patch_angle = quaternion_yaw(rotation) / np.pi * 180
+        if patch_angle < 0:
+            patch_angle += 360
+        can_bus[-2] = patch_angle / 180 * np.pi
+        can_bus[-1] = patch_angle
+        input_dict['can_bus'] = can_bus
+        # import pdb;pdb.set_trace()
+        return input_dict
+    def prepare_test_data(self, index):
+        """Prepare data for testing.
+        Args:
+            index (int): Index for accessing the target data.
+        Returns:
+            dict: Testing data dict of the corresponding index.
+        """
+        input_dict = self.get_data_info(index)
+        self.pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        if self.is_vis_on_test:
+            example = self.vectormap_pipeline(example, input_dict)
+        return example
+    def __getitem__(self, idx):
+        """Get item from infos according to the given index.
+        Returns:
+            dict: Data dictionary of the corresponding index.
+        """
+        if self.test_mode:
+            return self.prepare_test_data(idx)
+        while True:
+            data = self.prepare_train_data(idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                continue
+            return data
+    def _format_gt(self):
+        gt_annos = []
+        # import ipdb;ipdb.set_trace()
+        print('Start to convert gt map format...')
+        assert self.map_ann_file is not None
+        if (not os.path.exists(self.map_ann_file)) :
+            dataset_length = len(self)
+            prog_bar = mmcv.ProgressBar(dataset_length)
+            mapped_class_names = self.MAPCLASSES
+            for sample_id in range(dataset_length):
+                sample_token = self.data_infos[sample_id]['token']
+                gt_anno = {}
+                gt_anno['sample_token'] = sample_token
+                # gt_sample_annos = []
+                gt_sample_dict = {}
+                gt_sample_dict = self.vectormap_pipeline(gt_sample_dict, self.data_infos[sample_id])
+                gt_labels = gt_sample_dict['gt_labels_3d'].data.numpy()
+                gt_vecs = gt_sample_dict['gt_bboxes_3d'].data.instance_list
+                # import pdb;pdb.set_trace()
+                gt_vec_list = []
+                for i, (gt_label, gt_vec) in enumerate(zip(gt_labels, gt_vecs)):
+                    name = mapped_class_names[gt_label]
+                    anno = dict(
+                        pts=np.array(list(gt_vec.coords))[:,:self.code_size],
+                        pts_num=len(list(gt_vec.coords)),
+                        cls_name=name,
+                        type=gt_label,
+                    )
+                    gt_vec_list.append(anno)
+                gt_anno['vectors']=gt_vec_list
+                gt_annos.append(gt_anno)
+                prog_bar.update()
+            nusc_submissions = {
+                'GTs': gt_annos
+            }
+            print('\n GT anns writes to', self.map_ann_file)
+            mmcv.dump(nusc_submissions, self.map_ann_file)
+        else:
+            print(f'{self.map_ann_file} exist, not update')
+    def _format_bbox(self, results, jsonfile_prefix=None):
+        """Convert the results to the standard format.
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            jsonfile_prefix (str): The prefix of the output jsonfile.
+                You can specify the output directory/filename by
+                modifying the jsonfile_prefix. Default: None.
+        Returns:
+            str: Path of the output json file.
+        """
+        assert self.map_ann_file is not None
+        pred_annos = []
+        mapped_class_names = self.MAPCLASSES
+        # import ipdb;ipdb.set_trace()
+        print('Start to convert map detection format...')
+        for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
+            pred_anno = {}
+            vecs = output_to_vecs(det)
+            sample_token = self.data_infos[sample_id]['token']
+            pred_anno['sample_token'] = sample_token
+            pred_vec_list=[]
+            for i, vec in enumerate(vecs):
+                name = mapped_class_names[vec['label']]
+                anno = dict(
+                    # sample_token=sample_token,
+                    pts=vec['pts'],
+                    pts_num=len(vec['pts']),
+                    cls_name=name,
+                    type=vec['label'],
+                    confidence_level=vec['score'])
+                pred_vec_list.append(anno)
+                # annos.append(nusc_anno)
+            # nusc_annos[sample_token] = annos
+            pred_anno['vectors'] = pred_vec_list
+            pred_annos.append(pred_anno)
+        if not os.path.exists(self.map_ann_file):
+            self._format_gt()
+        else:
+            print(f'{self.map_ann_file} exist, not update')
+        # with open(self.map_ann_file,'r') as f:
+        #     GT_anns = json.load(f)
+        # gt_annos = GT_anns['GTs']
+        nusc_submissions = {
+            'meta': self.modality,
+            'results': pred_annos,
+            # 'GTs': gt_annos
+        }
+        mmcv.mkdir_or_exist(jsonfile_prefix)
+        res_path = osp.join(jsonfile_prefix, 'av2map_results.json')
+        print('Results writes to', res_path)
+        mmcv.dump(nusc_submissions, res_path)
+        return res_path
+    def to_gt_vectors(self,
+                      gt_dict):
+        # import pdb;pdb.set_trace()
+        gt_labels = gt_dict['gt_labels_3d'].data
+        gt_instances = gt_dict['gt_bboxes_3d'].data.instance_list
+        gt_vectors = []
+        for gt_instance, gt_label in zip(gt_instances, gt_labels):
+            pts, pts_num = sample_pts_from_line(gt_instance, patch_size=self.patch_size)
+            gt_vectors.append({
+                'pts': pts,
+                'pts_num': pts_num,
+                'type': int(gt_label)
+            })
+        vector_num_list = {}
+        for i in range(self.NUM_MAPCLASSES):
+            vector_num_list[i] = []
+        for vec in gt_vectors:
+            if vector['pts_num'] >= 2:
+                vector_num_list[vector['type']].append((LineString(vector['pts'][:vector['pts_num']]), vector.get('confidence_level', 1)))
+        return gt_vectors
+    def _evaluate_single(self,
+                         result_path,
+                         logger=None,
+                         metric='chamfer',
+                         result_name='pts_bbox'):
+        """Evaluation for a single model in nuScenes protocol.
+        Args:
+            result_path (str): Path of the result file.
+            logger (logging.Logger | str | None): Logger used for printing
+                related information during evaluation. Default: None.
+            metric (str): Metric name used for evaluation. Default: 'bbox'.
+            result_name (str): Result name in the metric prefix.
+                Default: 'pts_bbox'.
+        Returns:
+            dict: Dictionary of evaluation details.
+        """
+        from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import eval_map
+        from projects.mmdet3d_plugin.datasets.map_utils.mean_ap import format_res_gt_by_classes
+        result_path = osp.abspath(result_path)
+        # import pdb;pdb.set_trace()
+        detail = dict()
+        print('Formating results & gts by classes')
+        with open(result_path,'r') as f:
+            pred_results = json.load(f)
+        gen_results = pred_results['results']
+        with open(self.map_ann_file,'r') as ann_f:
+            gt_anns = json.load(ann_f)
+        annotations = gt_anns['GTs']
+        cls_gens, cls_gts = format_res_gt_by_classes(result_path,
+                                                     gen_results,
+                                                     annotations,
+                                                     cls_names=self.MAPCLASSES,
+                                                     num_pred_pts_per_instance=self.fixed_num,
+                                                     eval_use_same_gt_sample_num_flag=self.eval_use_same_gt_sample_num_flag,
+                                                     pc_range=self.pc_range,
+                                                     code_size=self.code_size)
+        metrics = metric if isinstance(metric, list) else [metric]
+        allowed_metrics = ['chamfer', 'iou']
+        for metric in metrics:
+            if metric not in allowed_metrics:
+                raise KeyError(f'metric {metric} is not supported')
+        for metric in metrics:
+            print('-*'*10+f'use metric:{metric}'+'-*'*10)
+            if metric == 'chamfer':
+                thresholds = [0.5,1.0,1.5]
+            elif metric == 'iou':
+                thresholds= np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
+            cls_aps = np.zeros((len(thresholds),self.NUM_MAPCLASSES))
+            for i, thr in enumerate(thresholds):
+                print('-*'*10+f'threshhold:{thr}'+'-*'*10)
+                mAP, cls_ap = eval_map(
+                                gen_results,
+                                annotations,
+                                cls_gens,
+                                cls_gts,
+                                threshold=thr,
+                                cls_names=self.MAPCLASSES,
+                                logger=logger,
+                                num_pred_pts_per_instance=self.fixed_num,
+                                pc_range=self.pc_range,
+                                metric=metric,
+                                code_size=self.code_size)
+                for j in range(self.NUM_MAPCLASSES):
+                    cls_aps[i, j] = cls_ap[j]['ap']
+            for i, name in enumerate(self.MAPCLASSES):
+                print('{}: {}'.format(name, cls_aps.mean(0)[i]))
+                detail['AV2Map_{}/{}_AP'.format(metric,name)] =  cls_aps.mean(0)[i]
+            print('map: {}'.format(cls_aps.mean(0).mean()))
+            detail['AV2Map_{}/mAP'.format(metric)] = cls_aps.mean(0).mean()
+            for i, name in enumerate(self.MAPCLASSES):
+                for j, thr in enumerate(thresholds):
+                    if metric == 'chamfer':
+                        detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
+                    elif metric == 'iou':
+                        if thr == 0.5 or thr == 0.75:
+                            detail['AV2Map_{}/{}_AP_thr_{}'.format(metric,name,thr)]=cls_aps[j][i]
+        return detail
+    def evaluate(self,
+                 results,
+                 metric='bbox',
+                 logger=None,
+                 jsonfile_prefix=None,
+                 result_names=['pts_bbox'],
+                 show=False,
+                 out_dir=None,
+                 pipeline=None):
+        """Evaluation in nuScenes protocol.
+        Args:
+            results (list[dict]): Testing results of the dataset.
+            metric (str | list[str]): Metrics to be evaluated.
+            logger (logging.Logger | str | None): Logger used for printing
+                related information during evaluation. Default: None.
+            jsonfile_prefix (str | None): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+            show (bool): Whether to visualize.
+                Default: False.
+            out_dir (str): Path to save the visualization results.
+                Default: None.
+            pipeline (list[dict], optional): raw data loading for showing.
+                Default: None.
+        Returns:
+            dict[str, float]: Results of each evaluation metric.
+        """
+        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
+        if isinstance(result_files, dict):
+            results_dict = dict()
+            for name in result_names:
+                print('Evaluating bboxes of {}'.format(name))
+                ret_dict = self._evaluate_single(result_files[name], metric=metric)
+            results_dict.update(ret_dict)
+        elif isinstance(result_files, str):
+            results_dict = self._evaluate_single(result_files, metric=metric)
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+        if show:
+            self.show(results, out_dir, pipeline=pipeline)
+        return results_dict
+def output_to_vecs(detection):
+    box3d = detection['boxes_3d'].numpy()
+    scores = detection['scores_3d'].numpy()
+    labels = detection['labels_3d'].numpy()
+    pts = detection['pts_3d'].numpy()
+    vec_list = []
+    # import pdb;pdb.set_trace()
+    for i in range(box3d.shape[0]):
+        vec = dict(
+            bbox = box3d[i], # xyxy
+            label=labels[i],
+            score=scores[i],
+            pts=pts[i],
+        )
+        vec_list.append(vec)
+    return vec_list
+def sample_pts_from_line(line, 
+                         fixed_num=-1,
+                         sample_dist=1,
+                         normalize=False,
+                         patch_size=None,
+                         padding=False,
+                         num_samples=250,):
+    if fixed_num < 0:
+        distances = np.arange(0, line.length, sample_dist)
+        if line.has_z:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+        else:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+    else:
+        # fixed number of points, so distance is line.length / fixed_num
+        distances = np.linspace(0, line.length, fixed_num)
+        if line.has_z:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 3)
+        else:
+            sampled_points = np.array([list(line.interpolate(distance).coords) for distance in distances]).reshape(-1, 2)
+    if normalize:
+        sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
+    num_valid = len(sampled_points)
+    if not padding or fixed_num > 0:
+        # fixed num sample can return now!
+        return sampled_points, num_valid
+    # fixed distance sampling need padding!
+    num_valid = len(sampled_points)
+    if fixed_num < 0:
+        if num_valid < num_samples:
+            padding = np.zeros((num_samples - len(sampled_points), sampled_points.shape[-1]))
+            sampled_points = np.concatenate([sampled_points, padding], axis=0)
+        else:
+            sampled_points = sampled_points[:num_samples, :]
+            num_valid = num_samples
+        if normalize:
+            sampled_points[:,:2] = sampled_points[:,:2] / np.array([patch_size[1], patch_size[0]])
+            num_valid = len(sampled_points)
+    return sampled_points[:,:2], num_valid
\ No newline at end of file
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/builder.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/builder.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import platform
+import random
+from functools import partial
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg
+from torch.utils.data import DataLoader
+from mmdet.datasets.samplers import GroupSampler
+from projects.mmdet3d_plugin.datasets.samplers.group_sampler import DistributedGroupSampler
+from projects.mmdet3d_plugin.datasets.samplers.distributed_sampler import DistributedSampler
+from projects.mmdet3d_plugin.datasets.samplers.sampler import build_sampler
+def build_dataloader(dataset,
+                     samples_per_gpu,
+                     workers_per_gpu,
+                     num_gpus=1,
+                     dist=True,
+                     shuffle=True,
+                     seed=None,
+                     shuffler_sampler=None,
+                     nonshuffler_sampler=None,
+                     **kwargs):
+    """Build PyTorch DataLoader.
+    In distributed training, each GPU/process has a dataloader.
+    In non-distributed training, there is only one dataloader for all GPUs.
+    Args:
+        dataset (Dataset): A PyTorch dataset.
+        samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+            batch size of each GPU.
+        workers_per_gpu (int): How many subprocesses to use for data loading
+            for each GPU.
+        num_gpus (int): Number of GPUs. Only used in non-distributed training.
+        dist (bool): Distributed training/test or not. Default: True.
+        shuffle (bool): Whether to shuffle the data at every epoch.
+            Default: True.
+        kwargs: any keyword argument to be used to initialize DataLoader
+    Returns:
+        DataLoader: A PyTorch dataloader.
+    """
+    rank, world_size = get_dist_info()
+    if dist:
+        # DistributedGroupSampler will definitely shuffle the data to satisfy
+        # that images on each GPU are in the same group
+        if shuffle:
+            sampler = build_sampler(shuffler_sampler if shuffler_sampler is not None else dict(type='DistributedGroupSampler'),
+                                     dict(
+                                         dataset=dataset,
+                                         samples_per_gpu=samples_per_gpu,
+                                         num_replicas=world_size,
+                                         rank=rank,
+                                         seed=seed)
+                                     )
+        else:
+            sampler = build_sampler(nonshuffler_sampler if nonshuffler_sampler is not None else dict(type='DistributedSampler'),
+                                     dict(
+                                         dataset=dataset,
+                                         num_replicas=world_size,
+                                         rank=rank,
+                                         shuffle=shuffle,
+                                         seed=seed)
+                                     )
+        batch_size = samples_per_gpu
+        num_workers = workers_per_gpu
+    else:
+        # assert False, 'not support in bevformer'
+        print('WARNING!!!!, Only can be used for obtain inference speed!!!!')
+        sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
+        batch_size = num_gpus * samples_per_gpu
+        num_workers = num_gpus * workers_per_gpu
+    init_fn = partial(
+        worker_init_fn, num_workers=num_workers, rank=rank,
+        seed=seed) if seed is not None else None
+    data_loader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        sampler=sampler,
+        num_workers=num_workers,
+        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+        pin_memory=True,
+        worker_init_fn=init_fn,
+        **kwargs)
+    return data_loader
+def worker_init_fn(worker_id, num_workers, rank, seed):
+    # The seed of each worker equals to
+    # num_worker * rank + worker_id + user_seed
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+# Copyright (c) OpenMMLab. All rights reserved.
+import platform
+from mmcv.utils import Registry, build_from_cfg
+from mmdet.datasets import DATASETS
+from mmdet.datasets.builder import _concat_dataset
+if platform.system() != 'Windows':
+    # https://github.com/pytorch/pytorch/issues/973
+    import resource
+    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    base_soft_limit = rlimit[0]
+    hard_limit = rlimit[1]
+    soft_limit = min(max(4096, base_soft_limit), hard_limit)
+    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+OBJECTSAMPLERS = Registry('Object sampler')
+def custom_build_dataset(cfg, default_args=None):
+    from mmdet3d.datasets.dataset_wrappers import CBGSDataset
+    from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
+                                                 ConcatDataset, RepeatDataset)
+    if isinstance(cfg, (list, tuple)):
+        dataset = ConcatDataset([custom_build_dataset(c, default_args) for c in cfg])
+    elif cfg['type'] == 'ConcatDataset':
+        dataset = ConcatDataset(
+            [custom_build_dataset(c, default_args) for c in cfg['datasets']],
+            cfg.get('separate_eval', True))
+    elif cfg['type'] == 'RepeatDataset':
+        dataset = RepeatDataset(
+            custom_build_dataset(cfg['dataset'], default_args), cfg['times'])
+    elif cfg['type'] == 'ClassBalancedDataset':
+        dataset = ClassBalancedDataset(
+            custom_build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
+    elif cfg['type'] == 'CBGSDataset':
+        dataset = CBGSDataset(custom_build_dataset(cfg['dataset'], default_args))
+    elif isinstance(cfg.get('ann_file'), (list, tuple)):
+        dataset = _concat_dataset(cfg, default_args)
+    else:
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+    return dataset
--- a/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/map_utils/__init__.py
+++ b/docker-hub/MapTRv2/MapTR/projects/mmdet3d_plugin/datasets/map_utils/__init__.py
+# from .CD_loss import MyChamferDistance
\ No newline at end of file