UpDate README.md

f3b13cad · yeshenglong1 · 0797920d · f3b13cad · f3b13cad · f3b13cad
Commit f3b13cad authored May 17, 2023 by yeshenglong1
20 changed files
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/dg_head.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/dg_head.py
 import copy
 import torch
 import torch.nn as nn
 from mmcv.cnn import Linear, bias_init_with_prob, build_activation_layer
 from mmcv.cnn.bricks.transformer import build_positional_encoding
 from mmcv.runner import force_fp32
 from mmdet.models import HEADS, build_head, build_loss
 from mmdet.models.utils import build_transformer
 from mmdet.models.utils.transformer import inverse_sigmoid
 from .base_map_head import BaseMapHead
 import numpy as np
 from ..augmentation.sythesis_det import NoiseSythesis
 @HEADS.register_module(force=True)
 class DGHead(BaseMapHead):
    def __init__(self,
                 det_net_cfg=dict(),
                 gen_net_cfg=dict(),
                 loss_vert=dict(),
                 loss_face=dict(),
                 max_num_vertices=90,
                 top_p_gen_model=0.9,
                 sync_cls_avg_factor=True,
                 augmentation=False,
                 augmentation_kwargs=None,
                 joint_training=False,
                 **kwargs):
        super().__init__()
        # Heads
        self.det_net = build_head(det_net_cfg)
        self.gen_net = build_head(gen_net_cfg)
        self.coord_dim = self.gen_net.coord_dim
        # Loss params
        self.bg_cls_weight = 1.0
        self.sync_cls_avg_factor = sync_cls_avg_factor
        self.max_num_vertices = max_num_vertices
        self.top_p_gen_model = top_p_gen_model
        self.fp16_enabled = False
        self.augmentation = None
        if augmentation:
            augmentation_kwargs.update({'canvas_size':gen_net_cfg.canvas_size})
            self.augmentation = NoiseSythesis(**augmentation_kwargs)
        self.joint_training = joint_training
    def forward(self, batch, img_metas=None, **kwargs):
        '''
            Args:
            Returns:
                outs (Dict):   
        '''
        if self.training:
            return self.forward_train(batch, **kwargs)
        else:
            return self.inference(batch, **kwargs)
    def forward_train(self, batch: dict, context: dict, only_det=False, **kwargs):
        ''' we use teacher force strategy'''
        bbox_dict = self.det_net(context=context)
        outs = dict(
                bbox=bbox_dict,
            )
        losses_dict, det_match_idxs, det_match_gt_idxs = \
            self.loss_det(batch, outs)
        if only_det: return outs, losses_dict
        if self.augmentation is not None:
            polylines, bbox_flat =\
                self.augmentation(batch['gen'],simple_aug=True)
            if bbox_flat is None:
                bbox_flat = batch['gen']['bbox_flat']
            gen_input = dict(
                lines_bs_idx=batch['gen']['lines_bs_idx'],
                lines_cls=batch['gen']['lines_cls'],
                bbox_flat=bbox_flat,
                polylines=polylines,
                polyline_masks=batch['gen']['polyline_masks']
            )
        else:
            gen_input = batch['gen']
        if self.joint_training:
            # for down stream polyline
            if 'lines' in bbox_dict[-1]:
                # for fix anchor
                pred_bbox = bbox_dict[-1]['lines'].detach()
            elif 'bboxs' in bbox_dict[-1]:
                # for rpv
                pred_bbox = bbox_dict[-1]['bboxs'].detach()
            else:
                raise NotImplementedError
            #  changed to original gt order. 
            det_match_idx = det_match_idxs[-1]
            det_match_gt_idx = det_match_gt_idxs[-1]
            _bboxs = []
            for i, (match_idx, bbox) in enumerate(zip(det_match_idx,pred_bbox)):
                    _bboxs.append(bbox[match_idx])
                    _bboxs[-1] = _bboxs[-1][torch.argsort(det_match_gt_idx[i])]
            _bboxs = torch.cat(_bboxs, dim=0)
            # quantize the data
            _bboxs = \
                torch.round(_bboxs).type(torch.int32)
            # gen_input['bbox_flat'] = _bboxs
            remain_idx = torch.randperm(_bboxs.shape[0])[:int(_bboxs.shape[0]*0.2)]
            # for data efficient
            for k in gen_input.keys():
                if k == 'bbox_flat':
                    gen_input[k] = torch.cat((_bboxs,gen_input[k][remain_idx]),dim=0)
                else:
                    gen_input[k] = torch.cat((gen_input[k],gen_input[k][remain_idx]),dim=0)
        if isinstance(context['bev_embeddings'],tuple):
            context['bev_embeddings'] = context['bev_embeddings'][0]
        poly_dict = self.gen_net(gen_input, context=context)
        outs.update(dict(
            polylines=poly_dict,
        ))
        if self.joint_training:
            for k in batch['gen'].keys():
                batch['gen'][k] = \
                    torch.cat((batch['gen'][k],batch['gen'][k][remain_idx]),dim=0)
        gen_losses_dict = \
            self.loss_gen(batch, outs)
        losses_dict.update(gen_losses_dict) 
        return outs, losses_dict
    def loss_det(self, gt: dict, pred: dict):
        loss_dict = {}
        # det
        det_loss_dict, det_match_idx, det_match_gt_idx = \
            self.det_net.loss(gt['det'], pred['bbox'])
        for k, v in det_loss_dict.items():
            loss_dict['det_'+k] = v
        return loss_dict, det_match_idx, det_match_gt_idx
    def loss_gen(self, gt: dict, pred: dict):
        loss_dict = {}
        # gen
        gen_loss_dict = self.gen_net.loss(gt['gen'], pred['polylines'])
        for k, v in gen_loss_dict.items():
            loss_dict['gen_'+k] = v
        return loss_dict
    def loss(self, gt: dict, pred: dict):
        pass
    @torch.no_grad()
    def inference(self, batch: dict={}, context: dict={}, gt_condition=False, **kwargs):
        '''
            num_samples_batch: number of sample per batch (batch size)
        '''
        outs = {}
        bbox_dict = self.det_net(context=context)
        bbox_dict = self.det_net.post_process(bbox_dict)
        outs.update(bbox_dict)
        if len(outs['lines_bs_idx']) == 0:
            return None
        if isinstance(context['bev_embeddings'],tuple):
            context['bev_embeddings'] = context['bev_embeddings'][0]
        poly_dict = self.gen_net(outs,
                                 context=context,
                                #  max_sample_length=self.max_num_vertices,
                                 max_sample_length=64,
                                 top_p=self.top_p_gen_model,
                                 gt_condition=gt_condition)
        outs.update(poly_dict)
        return outs
    def post_process(self, preds: dict, tokens, gts:dict=None, **kwargs):
        '''
            Args:
                XXX
            Outs:
               XXX
        '''
        range_size = self.gen_net.canvas_size.cpu().numpy()
        coord_dim = self.gen_net.coord_dim
        gen_net_name = self.gen_net.name if hasattr(self.gen_net,'name') else 'gen'
        ret_list = []
        for batch_idx in range(len(tokens)):
            ret_dict_single = {}
            # bbox
            det_gt = None
            if gts is not None:
                det_gt, rec_groundtruth = pack_groundtruth(
                    batch_idx,gts,tokens,range_size,gen_net_name,coord_dim=coord_dim)
            bbox_res = {
                # 'bboxes': preds['bbox'][batch_idx].detach().cpu().numpy(),
                # 'det_gt': det_gt,
                'token': tokens[batch_idx],
                'scores': preds['scores'][batch_idx].detach().cpu().numpy(),
                'labels': preds['labels'][batch_idx].detach().cpu().numpy(),
            }
            ret_dict_single.update(bbox_res)
            # for gen results.
            batch2seq = np.nonzero(
                preds['lines_bs_idx'].cpu().numpy() == batch_idx)[0]
            ret_dict_single.update({
                'nline': len(batch2seq),
                'vectors': []
            })
            for i in batch2seq:
                pre = preds['polylines'][i].detach().cpu().numpy()
                pre_msk = preds['polyline_masks'][i].detach().cpu().numpy()
                valid_idx = np.nonzero(pre_msk)[0][:-1]
                # From [200,1] to [199,0] to (1,0)
                line = (pre[valid_idx].reshape(-1, coord_dim) - 1) / (range_size-1)
                ret_dict_single['vectors'].append(line)
            # if gts is not None:
            #     ret_dict_single['groundTruth'] = rec_groundtruth
            ret_list.append(ret_dict_single)
        return ret_list
 def pack_groundtruth(batch_idx,gts,tokens,range_size,gen_net_name='gen',coord_dim=2):
    if 'keypoints' in gts['det']:
        gt_bbox = \
            gts['det']['keypoints'][batch_idx].detach().cpu().numpy()
    else:
        gt_bbox = \
            gts['det']['bbox'][batch_idx].detach().cpu().numpy()
    det_gt = {
        'labels': gts['det']['class_label'][batch_idx].detach().cpu().numpy(),
        'bboxes': gt_bbox,
    }
    batch2seq = np.nonzero(
        gts['gen']['lines_bs_idx'].cpu().numpy() == batch_idx)[0]
    ret_groundtruth = {
        'token': tokens[batch_idx],
        'nline': len(batch2seq),
        'labels': gts['gen']['lines_cls'][batch2seq].detach().cpu().numpy(),
        'lines': [],
    }
    for i in batch2seq:
        gt_line =\
            gts['gen']['polylines'].detach().cpu().numpy()[i]
        gt_msk = gts['gen']['polyline_masks'].detach().cpu().numpy()[i]
        if gen_net_name == 'gen_gmm':
            valid_idx = np.nonzero(gt_msk)[0]
        else:
            valid_idx = np.nonzero(gt_msk)[0][:-1]
        # From [200,1] to [199,0] to (1,0)
        line = (gt_line[valid_idx].reshape(-1, coord_dim) - 1) / (range_size-1)
        ret_groundtruth['lines'].append(line)
    return det_gt, ret_groundtruth
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/map_element_detector.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/map_element_detector.py
 import copy
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn import Conv2d, Linear
 from mmcv.runner import force_fp32
 from torch.distributions.categorical import Categorical
 from mmdet.core import (multi_apply, build_assigner, build_sampler,
                        reduce_mean)
 from mmdet.models import HEADS
 from .detr_bbox import DETRBboxHead
 from mmdet.models.utils.transformer import inverse_sigmoid
 from mmdet.models import build_loss
 from mmcv.cnn import Linear, build_activation_layer, bias_init_with_prob
 from mmcv.cnn.bricks.transformer import build_positional_encoding
 from mmdet.models.utils import build_transformer
 @HEADS.register_module(force=True)
 class MapElementDetector(nn.Module):
    def __init__(self, 
                 canvas_size=(400, 200), 
                 discrete_output=False, 
                 separate_detect=False, 
                 mode='xyxy', 
                 bbox_size=None, 
                 coord_dim=2, 
                 kp_coord_dim=2,
                 num_classes=3,
                 in_channels=128,
                 num_query=100,
                 max_lines=50,
                 score_thre=0.2,
                 num_reg_fcs=2,
                 num_points=100,
                 iterative=False,
                 patch_size=None,
                 sync_cls_avg_factor=True,
                 transformer: dict = None,
                 positional_encoding: dict = None,
                 loss_cls: dict = None,
                 loss_reg: dict = None,
                 train_cfg: dict = None,):
        super().__init__()
        assigner = train_cfg['assigner']
        self.assigner = build_assigner(assigner)
        # DETR sampling=False, so use PseudoSampler
        sampler_cfg = dict(type='PseudoSampler')
        self.sampler = build_sampler(sampler_cfg, context=self)
        self.train_cfg = train_cfg
        self.max_lines = max_lines
        self.score_thre = score_thre
        self.num_query = num_query
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.num_points = num_points
        # branch
        # if loss_cls.use_sigmoid:
        if loss_cls['use_sigmoid']:
            self.cls_out_channels = num_classes
        else:
            self.cls_out_channels = num_classes+1
        self.iterative = iterative
        self.num_reg_fcs = num_reg_fcs
        self._build_transformer(transformer, positional_encoding)
        # loss params
        self.loss_cls = build_loss(loss_cls)
        self.bg_cls_weight = 0.1
        if self.loss_cls.use_sigmoid:
            self.bg_cls_weight = 0.0
        self.sync_cls_avg_factor = sync_cls_avg_factor
        self.reg_loss = build_loss(loss_reg)
        self.separate_detect = separate_detect
        self.discrete_output = discrete_output
        self.bbox_size = 3 if mode=='sce' else 2
        if bbox_size is not None:
            self.bbox_size = bbox_size
        self.coord_dim = coord_dim  # for xyz
        self.kp_coord_dim = kp_coord_dim
        self.register_buffer('canvas_size', torch.tensor(canvas_size))
        # add reg, cls head for each decoder layer
        self._init_layers()
        self._init_branch()
        self.init_weights()
        self._init_embedding()
    def _init_layers(self):
        """Initialize some layer."""
        self.input_proj = Conv2d(
            self.in_channels, self.embed_dims, kernel_size=1)
        # query_pos_embed & query_embed
        self.query_embedding = nn.Embedding(self.num_query,
                                            self.embed_dims)
    def _init_embedding(self):
        self.label_embed = nn.Embedding(
            self.num_classes, self.embed_dims)
        self.img_coord_embed = nn.Linear(2, self.embed_dims)
        # query_pos_embed & query_embed
        self.query_embedding = nn.Embedding(self.num_query,
                                            self.embed_dims*2)
        # for bbox parameter xstart, ystart, xend, yend
        self.bbox_embedding = nn.Embedding( self.bbox_size, 
                                            self.embed_dims*2)
    def _init_branch(self,):
        """Initialize classification branch and regression branch of head."""
        fc_cls = Linear(self.embed_dims*self.bbox_size, self.cls_out_channels)
        # fc_cls = Linear(self.embed_dims, self.cls_out_channels)
        reg_branch = []
        for _ in range(self.num_reg_fcs):
            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
            reg_branch.append(nn.LayerNorm(self.embed_dims))
            reg_branch.append(nn.ReLU())
        if self.discrete_output:
            reg_branch.append(nn.Linear(
                self.embed_dims, max(self.canvas_size), bias=True,))
        else:
            reg_branch.append(nn.Linear(
                self.embed_dims, self.coord_dim, bias=True,))
        reg_branch = nn.Sequential(*reg_branch)
        # add sigmoid or not
        def _get_clones(module, N):
            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
        num_pred = self.transformer.decoder.num_layers
        if self.iterative:
            fc_cls = _get_clones(fc_cls, num_pred)
            reg_branch = _get_clones(reg_branch, num_pred)
        else:
            reg_branch = nn.ModuleList(
                [reg_branch for _ in range(num_pred)])
            fc_cls = nn.ModuleList(
                [fc_cls for _ in range(num_pred)])
        self.pre_branches = nn.ModuleDict([
            ('cls', fc_cls),
            ('reg', reg_branch), ])
    def init_weights(self):
        """Initialize weights of the DeformDETR head."""
        for p in self.input_proj.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
        self.transformer.init_weights()
        # init prediction branch
        for k, v in self.pre_branches.items():
            for param in v.parameters():
                if param.dim() > 1:
                    nn.init.xavier_uniform_(param)
        # focal loss init
        if self.loss_cls.use_sigmoid:
            bias_init = bias_init_with_prob(0.01)
            # for last layer
            if isinstance(self.pre_branches['cls'], nn.ModuleList):
                for m in self.pre_branches['cls']:
                    nn.init.constant_(m.bias, bias_init)
            else:
                m = self.pre_branches['cls']
                nn.init.constant_(m.bias, bias_init)
    def _build_transformer(self, transformer, positional_encoding):
        # transformer
        self.act_cfg = transformer.get('act_cfg',
                                       dict(type='ReLU', inplace=True))
        self.activate = build_activation_layer(self.act_cfg)
        self.positional_encoding = build_positional_encoding(
            positional_encoding)
        self.transformer = build_transformer(transformer)
        self.embed_dims = self.transformer.embed_dims
    def _prepare_context(self, context):
        """Prepare class label and vertex context."""
        global_context_embedding = None
        image_embeddings = context['bev_embeddings']
        image_embeddings = self.input_proj(
            image_embeddings)  # only change feature size
        # Pass images through encoder
        device = image_embeddings.device
        # Add 2D coordinate grid embedding
        B, C, H, W = image_embeddings.shape
        Ws = torch.linspace(-1., 1., W)
        Hs = torch.linspace(-1., 1., H)
        image_coords = torch.stack(
            torch.meshgrid(Hs, Ws), dim=-1).to(device)
        image_coord_embeddings = self.img_coord_embed(image_coords)
        image_embeddings += image_coord_embeddings[None].permute(0, 3, 1, 2)
        # Reshape spatial grid to sequence
        sequential_context_embeddings = image_embeddings.reshape(
            B, C, H, W)
        return (global_context_embedding, sequential_context_embeddings)
    def forward(self, context, img_metas=None, multi_scale=False):
        '''
        Args:
            bev_feature (List[Tensor]): shape [B, C, H, W]
                feature in bev view
            img_metas
        Outs:
            preds_dict (Dict):
                all_cls_scores (Tensor): Classification score of all
                    decoder layers, has shape
                    [nb_dec, bs, num_query, cls_out_channels].
                all_lines_preds (Tensor):
                    [nb_dec, bs, num_query, num_points, 2].
        '''
        (global_context_embedding, sequential_context_embeddings) =\
            self._prepare_context(context)
        x = sequential_context_embeddings
        B, C, H, W = x.shape
        query_embedding = self.query_embedding.weight[None,:,None].repeat(B, 1, self.bbox_size, 1)
        bbox_embed = self.bbox_embedding.weight
        query_embedding = query_embedding + bbox_embed[None,None]
        query_embedding = query_embedding.view(B, -1, C*2)
        img_masks = x.new_zeros((B, H, W))
        pos_embed = self.positional_encoding(img_masks)
        # outs_dec: [nb_dec, bs, num_query, embed_dim]
        hs, init_reference, inter_references = self.transformer(
                    [x,],
                    [img_masks.type(torch.bool)],
                    query_embedding,
                    [pos_embed],
                    reg_branches= self.reg_branches if self.iterative else None,  # noqa:E501
                    cls_branches= None,  # noqa:E501
            )
        outs_dec = hs.permute(0, 2, 1, 3)
        outputs = []
        for i, (query_feat) in enumerate(outs_dec):
            if i == 0:
                reference = init_reference
            else:
                reference = inter_references[i - 1]
            outputs.append(self.get_prediction(i,query_feat,reference))
        return outputs
    def get_prediction(self, level, query_feat, reference):
        bs, num_query, h = query_feat.shape
        query_feat = query_feat.view(bs, -1, self.bbox_size,h)
        ocls = self.pre_branches['cls'][level](query_feat.flatten(-2))
        # ocls = ocls.mean(-2)
        reference = inverse_sigmoid(reference)
        reference = reference.view(bs, -1, self.bbox_size,self.coord_dim)
        tmp = self.pre_branches['reg'][level](query_feat)
        tmp[...,:self.kp_coord_dim] =  tmp[...,:self.kp_coord_dim] + reference[...,:self.kp_coord_dim]
        lines = tmp.sigmoid() # bs, num_query, self.bbox_size,2
        lines = lines * self.canvas_size[:self.coord_dim]
        lines = lines.flatten(-2)
        return dict(
            lines=lines,  # [bs, num_query, bboxsize*2]
            scores=ocls,  # [bs, num_query, num_class]
            embeddings= query_feat, # [bs, num_query, bbox_size, h]
        )
    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
    def _get_target_single(self,
                           score_pred,
                           lines_pred,
                           gt_labels,
                           gt_lines,
                           gt_bboxes_ignore=None):
        """
            Compute regression and classification targets for one image.
            Outputs from a single decoder layer of a single feature level are used.
            Args:
                cls_score (Tensor): Box score logits from a single decoder layer
                    for one image. Shape [num_query, cls_out_channels].
                lines_pred (Tensor):
                    shape [num_query, num_points, 2].
                gt_lines (Tensor):
                    shape [num_gt, num_points, 2].
                gt_labels (torch.LongTensor)
                    shape [num_gt, ]
            Returns:
                tuple[Tensor]: a tuple containing the following for one image.
                    - labels (LongTensor): Labels of each image.
                        shape [num_query, 1]
                    - label_weights (Tensor]): Label weights of each image.
                        shape [num_query, 1]
                    - lines_target (Tensor): Lines targets of each image.
                        shape [num_query, num_points, 2]
                    - lines_weights (Tensor): Lines weights of each image.
                        shape [num_query, num_points, 2]
                    - pos_inds (Tensor): Sampled positive indices for each image.
                    - neg_inds (Tensor): Sampled negative indices for each image.
        """
        num_pred_lines = len(lines_pred)
        # assigner and sampler
        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
                                             gts=dict(lines=gt_lines,
                                                      labels=gt_labels, ),
                                             gt_bboxes_ignore=gt_bboxes_ignore)
        sampling_result = self.sampler.sample(
            assign_result, lines_pred, gt_lines)
        pos_inds = sampling_result.pos_inds
        neg_inds = sampling_result.neg_inds
        pos_gt_inds = sampling_result.pos_assigned_gt_inds
        # label targets 0: foreground, 1: background
        if self.separate_detect:
            labels = gt_lines.new_full((num_pred_lines, ), 1, dtype=torch.long)
        else:
            labels = gt_lines.new_full(
                (num_pred_lines, ), self.num_classes, dtype=torch.long)
        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
        label_weights = gt_lines.new_ones(num_pred_lines)
        # bbox targets since lines_pred's last dimension is the vocabulary
        # and ground truth dose not have this dimension.
        if self.discrete_output:
            lines_target = torch.zeros_like(lines_pred[..., 0]).long()
            lines_weights = torch.zeros_like(lines_pred[..., 0])
        else:
            lines_target = torch.zeros_like(lines_pred)
            lines_weights = torch.zeros_like(lines_pred)
        lines_target[pos_inds] = sampling_result.pos_gt_bboxes.type(
            lines_target.dtype)
        lines_weights[pos_inds] = 1.0
        n = lines_weights.sum(-1, keepdim=True)
        lines_weights = lines_weights / n.masked_fill(n == 0, 1)
        return (labels, label_weights, lines_target, lines_weights,
                pos_inds, neg_inds, pos_gt_inds)
    # @force_fp32(apply_to=('preds', 'gts'))
    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
        """
            Compute regression and classification targets for a batch image.
            Outputs from a single decoder layer of a single feature level are used.
            Args:
                cls_scores_list (list[Tensor]): Box score logits from a single
                    decoder layer for each image with shape [num_query,
                    cls_out_channels].
                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
                gt_lines_list (list[Tensor]): Ground truth lines for each image
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
                    boxes which can be ignored for each image. Default None.
            Returns:
                tuple: a tuple containing the following targets.
                    - labels_list (list[Tensor]): Labels for all images.
                    - label_weights_list (list[Tensor]): Label weights for all \
                        images.
                    - lines_targets_list (list[Tensor]): Lines targets for all \
                        images.
                    - lines_weight_list (list[Tensor]): Lines weights for all \
                        images.
                    - num_total_pos (int): Number of positive samples in all \
                        images.
                    - num_total_neg (int): Number of negative samples in all \
                        images.
        """
        assert gt_bboxes_ignore_list is None, \
            'Only supports for gt_bboxes_ignore setting to None.'
        # format the inputs
        if self.separate_detect:
            bbox = [b[m] for b, m in zip(gts['bbox'], gts['bbox_mask'])]
            class_label = torch.zeros_like(gts['bbox_mask']).long()
            class_label = [b[m] for b, m in zip(class_label, gts['bbox_mask'])]
        else:
            class_label = gts['class_label']
            bbox = gts['bbox']
        if self.discrete_output:
            lines_pred = preds['lines'].logits
        else:
            lines_pred = preds['lines']
            bbox = [b.float() for b in bbox]
        (labels_list, label_weights_list,
         lines_targets_list, lines_weights_list,
         pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(
             self._get_target_single,
             preds['scores'], lines_pred,
             class_label, bbox,
             gt_bboxes_ignore=gt_bboxes_ignore_list)
        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
        new_gts = dict(
            labels=labels_list,
            label_weights=label_weights_list,
            bboxs=lines_targets_list,
            bboxs_weights=lines_weights_list,
        )
        return new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list
    # @force_fp32(apply_to=('preds', 'gts'))
    def loss_single(self,
                    preds: dict,
                    gts: dict,
                    gt_bboxes_ignore_list=None,
                    reduction='none'):
        """
            Loss function for outputs from a single decoder layer of a single
            feature level.
            Args:
                cls_scores (Tensor): Box score logits from a single decoder layer
                    for all images. Shape [bs, num_query, cls_out_channels].
                lines_preds (Tensor):
                    shape [bs, num_query, num_points, 2].
                gt_lines_list (list[Tensor]):
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
                    boxes which can be ignored for each image. Default None.
            Returns:
                dict[str, Tensor]: A dictionary of loss components for outputs from
                    a single decoder layer.
        """
        # Get target for each sample
        new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\
            self.get_targets(preds, gts, gt_bboxes_ignore_list)
        # Batched all data
        for k, v in new_gts.items():
            new_gts[k] = torch.stack(v, dim=0)
        # construct weighted avg_factor to match with the official DETR repo
        cls_avg_factor = num_total_pos * 1.0 + \
            num_total_neg * self.bg_cls_weight
        if self.sync_cls_avg_factor:
            cls_avg_factor = reduce_mean(
                preds['scores'].new_tensor([cls_avg_factor]))
        cls_avg_factor = max(cls_avg_factor, 1)
        # Classification loss
        if self.separate_detect:
            loss_cls = self.bce_loss(
                preds['scores'], new_gts['labels'], new_gts['label_weights'], cls_avg_factor)
        else:
            # since the inputs needs the second dim is the class dim, we permute the prediction.
            cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
            cls_labels = new_gts['labels'].reshape(-1)
            cls_weights = new_gts['label_weights'].reshape(-1)
            loss_cls = self.loss_cls(
                cls_scores, cls_labels, cls_weights, avg_factor=cls_avg_factor)
        # Compute the average number of gt boxes accross all gpus, for
        # normalization purposes
        num_total_pos = loss_cls.new_tensor([num_total_pos])
        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
        # position NLL loss
        if self.discrete_output:
            loss_reg = -(preds['lines'].log_prob(new_gts['bboxs']) *
                         new_gts['bboxs_weights']).sum()/(num_total_pos)
        else:
            loss_reg = self.reg_loss(
                preds['lines'], new_gts['bboxs'], new_gts['bboxs_weights'], avg_factor=num_total_pos)
        loss_dict = dict(
            cls=loss_cls,
            reg=loss_reg,
        )
        return loss_dict, pos_inds_list, pos_gt_inds_list
    @force_fp32(apply_to=('gt_lines_list', 'preds_dicts'))
    def loss(self,
             gts: dict,
             preds_dicts: dict,
             gt_bboxes_ignore=None,
             reduction='mean'):
        """
            Loss Function.
            Args:
                gt_lines_list (list[Tensor]): Ground truth lines for each image
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                preds_dicts:
                    all_cls_scores (Tensor): Classification score of all
                        decoder layers, has shape
                        [nb_dec, bs, num_query, cls_out_channels].
                    all_lines_preds (Tensor):
                        [nb_dec, bs, num_query, num_points, 2].
                gt_bboxes_ignore (list[Tensor], optional): Bounding boxes
                    which can be ignored for each image. Default None.
            Returns:
                dict[str, Tensor]: A dictionary of loss components.
        """
        assert gt_bboxes_ignore is None, \
            f'{self.__class__.__name__} only supports ' \
            f'for gt_bboxes_ignore setting to None.'
        # Since there might have multi layer
        losses, pos_inds_lists, pos_gt_inds_lists = multi_apply(
            self.loss_single,
            preds_dicts,
            gts=gts,
            gt_bboxes_ignore_list=gt_bboxes_ignore,
            reduction=reduction)
        # Format the losses
        loss_dict = dict()
        # loss from the last decoder layer
        for k, v in losses[-1].items():
            loss_dict[k] = v
        # Loss from other decoder layers
        num_dec_layer = 0
        for loss in losses[:-1]:
            for k, v in loss.items():
                loss_dict[f'd{num_dec_layer}.{k}'] = v
            num_dec_layer += 1
        return loss_dict, pos_inds_lists, pos_gt_inds_lists
    def post_process(self, preds_dicts: list, **kwargs):
        '''
        Args:
            preds_dicts:
                scores (Tensor): Classification score of all
                    decoder layers, has shape
                    [nb_dec, bs, num_query, cls_out_channels].
                lines (Tensor):
                    [nb_dec, bs, num_query, bbox parameters(4)].
        Outs:
            ret_list (List[Dict]) with length as bs
                list of result dict for each sample in the batch
                XXX
        '''
        preds = preds_dicts[-1]
        batched_cls_scores = preds['scores']
        batched_lines_preds = preds['lines']
        batch_size = batched_cls_scores.size(0)
        device = batched_cls_scores.device
        result_dict = {
            'bbox': [],
            'scores': [],
            'labels': [],
            'bbox_flat': [],
            'lines_cls': [],
            'lines_bs_idx': [],
        }
        for i in range(batch_size):
            cls_scores = batched_cls_scores[i]
            det_preds = batched_lines_preds[i]
            max_num = self.max_lines
            if self.loss_cls.use_sigmoid:
                cls_scores = cls_scores.sigmoid()
                scores, valid_idx = cls_scores.view(-1).topk(max_num)
                det_labels = valid_idx % self.num_classes
                valid_idx = valid_idx // self.num_classes
                det_preds = det_preds[valid_idx]
            else:
                scores, det_labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
                scores, valid_idx = scores.topk(max_num)
                det_preds = det_preds[valid_idx]
                det_labels = det_labels[valid_idx]
            nline = len(valid_idx)
            result_dict['bbox'].append(det_preds)
            result_dict['scores'].append(scores)
            result_dict['labels'].append(det_labels)
            result_dict['lines_bs_idx'].extend([i]*nline)
        # for down stream polyline
        _bboxs = torch.cat(result_dict['bbox'], dim=0)
        # quantize the data
        result_dict['bbox_flat'] = torch.round(_bboxs).type(torch.int32)
        result_dict['lines_cls'] = torch.cat(
            result_dict['labels'], dim=0).long()
        result_dict['lines_bs_idx'] = torch.tensor(
            result_dict['lines_bs_idx'], device=device).long()
        return result_dict
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/polyline_generator.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/polyline_generator.py
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.distributions.categorical import Categorical
 from mmdet.models import HEADS
 from .detgen_utils.causal_trans import (CausalTransformerDecoder,
                           CausalTransformerDecoderLayer)
 from .detgen_utils.utils import (dequantize_verts, generate_square_subsequent_mask,
                    quantize_verts, top_k_logits, top_p_logits)
 from mmcv.runner import force_fp32, auto_fp16
 @HEADS.register_module(force=True)
 class PolylineGenerator(nn.Module):
    """
      Autoregressive generative model of n-gon meshes.
      Operates on sets of input vertices as well as flattened face sequences with
      new face and stopping tokens:
      [f_0^0, f_0^1, f_0^2, NEW, f_1^0, f_1^1, ..., STOP]
      Input vertices are encoded using a Transformer encoder.
      Input face sequences are embedded and tagged with learned position indicators,
      as well as their corresponding vertex embeddings. A transformer decoder
      outputs a pointer which is compared to each vertex embedding to obtain a
      distribution over vertex indices.
    """
    def __init__(self,
                 in_channels,
                 encoder_config,
                 decoder_config,
                 class_conditional=True,
                 num_classes=55,
                 decoder_cross_attention=True,
                 use_discrete_vertex_embeddings=True,
                 condition_points_num=3,
                 coord_dim=2,
                 canvas_size=(400, 200),
                 max_seq_length=500,
                 name='gen_model'):
        """Initializes FaceModel.
          Args:
            encoder_config: Dictionary with TransformerEncoder config.
            decoder_config: Dictionary with TransformerDecoder config.
            class_conditional: If True, then condition on learned class embeddings.
            num_classes: Number of classes to condition on.
            decoder_cross_attention: If True, the use cross attention from decoder
              querys into encoder outputs.
            use_discrete_vertex_embeddings: If True, use discrete vertex embeddings.
            max_seq_length: Maximum face sequence length. Used for learned position
              embeddings.
            name: Name of variable scope
        """
        super(PolylineGenerator, self).__init__()
        self.embedding_dim = decoder_config['layer_config']['d_model']
        self.class_conditional = class_conditional
        self.num_classes = num_classes
        self.max_seq_length = max_seq_length
        self.decoder_cross_attention = decoder_cross_attention
        self.use_discrete_vertex_embeddings = use_discrete_vertex_embeddings
        self.condition_points_num = condition_points_num
        self.fp16_enabled = False
        self.coord_dim = coord_dim  # if we use xyz else 2 when we use xy
        self.kp_coord_dim = coord_dim if coord_dim==2 else 2 # XXX
        self.register_buffer('canvas_size', torch.tensor(canvas_size))
        # initialize the model
        self._project_to_logits = nn.Linear(
            self.embedding_dim,
            max(canvas_size) + 1,  # + 1 for stopping token. use_bias=True,
        )
        self.input_proj = nn.Conv2d(
            in_channels, self.embedding_dim, kernel_size=1)
        decoder_layer = CausalTransformerDecoderLayer(
            **decoder_config.pop('layer_config'))
        self.decoder = CausalTransformerDecoder(
            decoder_layer, **decoder_config)
        self._init_embedding()
        self.init_weights()
    def _init_embedding(self):
        if self.class_conditional:
            self.label_embed = nn.Embedding(
                self.num_classes, self.embedding_dim)
        self.coord_embed = nn.Embedding(self.coord_dim, self.embedding_dim)
        self.pos_embeddings = nn.Embedding(
            self.max_seq_length, self.embedding_dim)
        # to indicate the role of the position is the start of the line or the end of it.
        self.bbox_context_embed = \
            nn.Embedding(self.condition_points_num, self.embedding_dim)
        self.img_coord_embed = nn.Linear(2, self.embedding_dim)
        # initialize the verteices embedding
        if self.use_discrete_vertex_embeddings:
            self.vertex_embed = nn.Embedding(
                max(self.canvas_size) + 1, self.embedding_dim)
        else:
            self.vertex_embed = nn.Linear(1, self.embedding_dim)
    def init_weights(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    def _embed_kps(self, bbox):
        bbox_len = bbox.shape[-1]
        # Bbox_context
        bbox_embedding = self.bbox_context_embed(
            (torch.arange(bbox_len, device=bbox.device) / self.kp_coord_dim).floor().long())
        # Coord indicators (x, y)
        coord_embeddings = self.coord_embed(
            torch.arange(bbox_len, device=bbox.device) % self.kp_coord_dim)
        # Discrete vertex value embeddings
        vert_embeddings = self.vertex_embed(bbox)
        return vert_embeddings + (bbox_embedding+coord_embeddings)[None]
    def _prepare_context(self, batch, context):
        """Prepare class label and vertex context."""
        global_context_embedding = None
        if self.class_conditional:
            global_context_embedding = self.label_embed(batch['lines_cls'])
        bbox_embeddings = self._embed_kps(batch['bbox_flat'])
        if global_context_embedding is not None:
            global_context_embedding = torch.cat(
                [global_context_embedding[:, None], bbox_embeddings], dim=1)
        # Pass images through encoder
        image_embeddings = assign_bev(
            context['bev_embeddings'], batch['lines_bs_idx'])
        image_embeddings = self.input_proj(image_embeddings)
        device = image_embeddings.device
        # Add 2D coordinate grid embedding
        H, W = image_embeddings.shape[2:]
        Ws = torch.linspace(-1., 1., W)
        Hs = torch.linspace(-1., 1., H)
        image_coords = torch.stack(
            torch.meshgrid(Hs, Ws), dim=-1).to(device)
        image_coord_embeddings = self.img_coord_embed(image_coords)
        image_embeddings += image_coord_embeddings[None].permute(0, 3, 1, 2)
        # Reshape spatial grid to sequence
        B = image_embeddings.shape[0]
        sequential_context_embeddings = image_embeddings.reshape(
            B, self.embedding_dim, -1).permute(0, 2, 1)
        return (global_context_embedding,
                sequential_context_embeddings)
    def _embed_inputs(self, seqs, condition_embedding=None):
        """Embeds face sequences and adds within and between face positions.
          Args:
            seq: B, seqlen=vlen*3, 
            condition_embedding: B, [c,xs,ys,xe,ye](5), h
          Returns:
            embeddings: B, seqlen, h
        """
        B, seq_len = seqs.shape[:2]
        # Position embeddings
        pos_embeddings = self.pos_embeddings(
            (torch.arange(seq_len, device=seqs.device) / self.coord_dim).floor().long())  # seq_len, h
        # Coord indicators (x, y, z(optional))
        coord_embeddings = self.coord_embed(
            torch.arange(seq_len, device=seqs.device) % self.coord_dim)
        # Discrete vertex value embeddings
        vert_embeddings = self.vertex_embed(seqs)
        # Aggregate embeddings
        embeddings = vert_embeddings + \
            (coord_embeddings+pos_embeddings)[None]
        embeddings = torch.cat([condition_embedding, embeddings], dim=1)
        return embeddings
    def forward(self, batch: dict, **kwargs):
        """
          Pass batch through face model and get log probabilities.
          Args:
            batch: Dictionary containing:
              'vertices_dequantized': Tensor of shape [batch_size, num_vertices, 3].
              'faces': int32 tensor of shape [batch_size, seq_length] with flattened
                faces.
              'vertices_mask': float32 tensor with shape
                [batch_size, num_vertices] that masks padded elements in 'vertices'.
        """
        if self.training:
            return self.forward_train(batch, **kwargs)
        else:
            return self.inference(batch, **kwargs)
    def sperate_forward(self, batch, context, **kwargs):
        polyline_length = batch['polyline_masks'].sum(-1)
        c1, c2, revert_idx, size = get_chunk_idx(polyline_length)
        sizes = [size, polyline_length.max()]
        polyline_logits = []
        for c_idx, size in zip([c1,c2], sizes):
            new_batch = assign_batch(batch,c_idx, size)
            _poly_logits = self._forward_train(new_batch,context,**kwargs)
            polyline_logits.append(_poly_logits)
        # maybe imporve the speed 
        for i, (_poly_logits, size) in enumerate(zip(polyline_logits, sizes)):    
            if size < sizes[1]:
                _poly_logits = F.pad(_poly_logits, (0,0,0,sizes[1]-size), "constant", 0)
                polyline_logits[i] = _poly_logits
        polyline_logits = torch.cat(polyline_logits,0)
        polyline_logits = polyline_logits[revert_idx]
        cat_dist = Categorical(logits=polyline_logits)
        return {'polylines':cat_dist}    
    def forward_train(self, batch: dict, context: dict, **kwargs):
        """
          Returns:
            pred_dist: Categorical predictive distribution with batch shape
                [batch_size, seq_length].
        """
        # we use the gt vertices
        if False:
            polyline_logits = self._forward_train(batch, context, **kwargs)
            cat_dist = Categorical(logits=polyline_logits)
            return {'polylines':cat_dist}
        else:
            return self.sperate_forward(batch, context, **kwargs)
    def _forward_train(self, batch: dict, context: dict, **kwargs):
        """
          Returns:
            pred_dist: Categorical predictive distribution with batch shape
                [batch_size, seq_length].
        """
        # we use the gt vertices
        global_context, seq_context = self._prepare_context(
            batch, context)
        logits = self.body(
            # Last element not used for preds
            batch['polylines'][:, :-1],
            global_context_embedding=global_context,
            sequential_context_embeddings=seq_context,
            return_logits=True,
            is_training=self.training)
        return logits
    @force_fp32(apply_to=('global_context_embedding','sequential_context_embeddings','cache'))
    def body(self,
             seqs,
             global_context_embedding=None,
             sequential_context_embeddings=None,
             temperature=1.,
             top_k=0,
             top_p=1.,
             cache=None,
             return_logits=False,
             is_training=True):
        """
            Outputs categorical dist for vertex indices.
            Body of the face model
        """
        # Embed inputs
        condition_len = global_context_embedding.shape[1]
        decoder_inputs = self._embed_inputs(
            seqs, global_context_embedding)
        # Pass through Transformer decoder
        # since our memory efficient decoder only support seq first setting.
        decoder_inputs = decoder_inputs.transpose(0, 1)
        if sequential_context_embeddings is not None:
            sequential_context_embeddings = sequential_context_embeddings.transpose(
                0, 1)
        causal_msk = None
        if is_training:
            causal_msk = generate_square_subsequent_mask(
                decoder_inputs.shape[0], condition_len=condition_len, device=decoder_inputs.device)
        decoder_outputs, cache = self.decoder(
            tgt=decoder_inputs,
            cache=cache,
            memory=sequential_context_embeddings,
            causal_mask=causal_msk,
        )
        decoder_outputs = decoder_outputs.transpose(0, 1)
        # since we only need the predict seq
        decoder_outputs = decoder_outputs[:, condition_len-1:]
        # Get logits and optionally process for sampling
        logits = self._project_to_logits(decoder_outputs)
        # y mask
        _vert_mask = torch.arange(logits.shape[-1], device=logits.device)
        vertices_mask_y = (_vert_mask < self.canvas_size[1]+1)
        vertices_mask_y[0] = False # y position doesn't have stop sign 
        logits[:, 1::self.coord_dim] = logits[:, 1::self.coord_dim] * \
            vertices_mask_y - ~vertices_mask_y*1e9
        if self.coord_dim > 2:
            # z mask
            _vert_mask = torch.arange(logits.shape[-1], device=logits.device)
            vertices_mask_z = (_vert_mask < self.canvas_size[2]+1)
            vertices_mask_z[0] = False # y position doesn't have stop sign 
            logits[:, 2::self.coord_dim] = logits[:, 2::self.coord_dim] * \
                vertices_mask_z - ~vertices_mask_z*1e9
        logits = logits/temperature
        logits = top_k_logits(logits, top_k)
        logits = top_p_logits(logits, top_p)
        if return_logits:
            return logits
        cat_dist = Categorical(logits=logits)
        return cat_dist, cache
    @force_fp32(apply_to=('pred'))
    def loss(self, gt: dict, pred: dict):
        weight = gt['polyline_weights']
        mask = gt['polyline_masks']
        loss = -torch.sum(
            pred['polylines'].log_prob(gt['polylines']) * mask * weight)/weight.sum()
        return {'seq': loss}
    def inference(self,
                  batch: dict,
                  context: dict,
                  max_sample_length=None,
                  temperature=1.,
                  top_k=0,
                  top_p=1.,
                  only_return_complete=False,
                  gt_condition=False,
                  **kwargs):
        """Sample from face model using caching.
        Args:
            context: Dictionary of context, including 'vertices' and 'vertices_mask'.
                See _prepare_context for details.
            max_sample_length: Maximum length of sampled vertex sequences. Sequences
                that do not complete are truncated.
            temperature: Scalar softmax temperature > 0.
            top_k: Number of tokens to keep for top-k sampling.
            top_p: Proportion of probability mass to keep for top-p sampling.
            only_return_complete: If True, only return completed samples. Otherwise
                return all samples along with completed indicator.
        Returns:
            outputs: Output dictionary with fields:
                'completed': Boolean tensor of shape [num_samples]. If True then
                corresponding sample completed within max_sample_length.
                'faces': Tensor of samples with shape [num_samples, num_verts, 3].
                'valid_polyline_len': Tensor indicating number of vertices for each
                example in padded vertex samples.
        """
        # prepare the conditional variable
        global_context, seq_context = self._prepare_context(
            batch, context)
        device = global_context.device
        batch_size = global_context.shape[0]
        # While loop sampling with caching
        samples = torch.empty(
            [batch_size, 0], dtype=torch.int32, device=device)
        max_sample_length = max_sample_length or self.max_seq_length
        seq_len = max_sample_length*self.coord_dim+1
        cache = None
        decoded_tokens = \
            torch.zeros((batch_size,seq_len),
                device=device,dtype=torch.long)
        remain_idx = torch.arange(batch_size, device=device)
        for i in range(seq_len):
            # While-loop body for autoregression calculation.
            pred_dist, cache = self.body(
                samples,
                global_context_embedding=global_context,
                sequential_context_embeddings=seq_context,
                cache=cache,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                is_training=False)
            samples = pred_dist.sample()
            decoded_tokens[remain_idx,i] = samples[:,-1]
            # Stopping conditions for autoregressive calculation.
            if not (decoded_tokens[:,:i+1] != 0).all(-1).any():
                break
            # update state, check the new position is zero.
            valid_idx = (samples[:,-1] != 0).nonzero(as_tuple=True)[0]
            remain_idx = remain_idx[valid_idx]
            cache = cache[:,:,valid_idx]
            global_context = global_context[valid_idx]
            seq_context = seq_context[valid_idx]
            samples = samples[valid_idx]
        # decoded_tokens = torch.cat(decoded_tokens,dim=1)
        decoded_tokens = decoded_tokens[:,:i+1]
        outputs = self.post_process(decoded_tokens, seq_len,
                                    device, only_return_complete)
        return outputs
    def post_process(self, polyline,
                     max_seq_len=None,
                     device=None,
                     only_return_complete=True):
        '''
            format the predictions
            find the mask
        '''
        # Record completed samples
        complete_samples = (polyline == 0).any(-1)
        # Find number of faces
        sample_seq_length = polyline.shape[-1]
        _polyline_mask = torch.arange(sample_seq_length)[None].to(device)
        # Get largest stopping point for incomplete samples.
        valid_polyline_len = torch.full_like(polyline[:,0], sample_seq_length)
        zero_inds = (polyline == 0).type(torch.int32).argmax(-1)
        # Real length
        valid_polyline_len[complete_samples] = zero_inds[complete_samples] + 1
        polyline_mask = _polyline_mask < valid_polyline_len[:, None]
        # Mask faces beyond stopping token with zeros
        polyline = polyline*polyline_mask
        # Pad to maximum size with zeros
        pad_size = max_seq_len - sample_seq_length
        polyline = F.pad(polyline, (0, pad_size))
        # polyline_mask = F.pad(polyline_mask, (0, pad_size))
        # XXX
        # if only_return_complete:
        #     polyline = polyline[complete_samples]
        #     valid_polyline_len = valid_polyline_len[complete_samples]
        #     context = tf.nest.map_structure(
        #         lambda x: tf.boolean_mask(x, complete_samples), context)
        #     complete_samples = complete_samples[complete_samples]
        # outputs
        outputs = {
            'completed': complete_samples,
            'polylines': polyline,
            'polyline_masks': polyline_mask,
        }
        return outputs
 def find_best_sperate_plan(idx,array):
    h = array[-1] - array[idx]
    w = idx
    cost  = h*w
    return cost
 def get_chunk_idx(polyline_length):
    _polyline_length, polyline_length_idx = torch.sort(polyline_length)
    costs = []
    for i in range(len(_polyline_length)):
        cost = find_best_sperate_plan(i,_polyline_length)
        costs.append(cost)
    seperate_point = torch.stack(costs).argmax()
    chunk1 = polyline_length_idx[:seperate_point+1]
    chunk2 = polyline_length_idx[seperate_point+1:]
    revert_idx = torch.argsort(polyline_length_idx)    
    return chunk1, chunk2, revert_idx, _polyline_length[seperate_point]
 def assign_bev(feat, idx):
    return feat[idx]
 def assign_batch(batch, idx, size):
    new_batch = {}
    for k,v in batch.items():
        new_batch[k] = v[idx]
        if new_batch[k].ndim > 1:
            new_batch[k] = new_batch[k][:,:size]
    return new_batch
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/losses/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/losses/__init__.py
 from .detr_loss import LinesLoss, MasksLoss, LenLoss
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/losses/detr_loss.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/losses/detr_loss.py
 import torch
 from torch import nn as nn
 from torch.nn import functional as F
 from mmdet.models.losses import l1_loss
 from mmdet.models.losses.utils import weighted_loss
 import mmcv
 from mmdet.models.builder import LOSSES
 @mmcv.jit(derivate=True, coderize=True)
 @weighted_loss
 def smooth_l1_loss(pred, target, beta=1.0):
    """Smooth L1 loss.
    Args:
        pred (torch.Tensor): The prediction.
        target (torch.Tensor): The learning target of the prediction.
        beta (float, optional): The threshold in the piecewise function.
            Defaults to 1.0.
    Returns:
        torch.Tensor: Calculated loss
    """
    assert beta > 0
    if target.numel() == 0:
        return pred.sum() * 0
    assert pred.size() == target.size()
    diff = torch.abs(pred - target)
    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
                       diff - 0.5 * beta)
    return loss
 @LOSSES.register_module()
 class LinesLoss(nn.Module):
    def __init__(self, reduction='mean', loss_weight=1.0, beta=0.5):
        """
            L1 loss. The same as the smooth L1 loss
            Args:
                reduction (str, optional): The method to reduce the loss.
                    Options are "none", "mean" and "sum".
                loss_weight (float, optional): The weight of loss.
        """
        super(LinesLoss, self).__init__()
        self.reduction = reduction
        self.loss_weight = loss_weight
        self.beta = beta
    def forward(self,
                pred,
                target,
                weight=None,
                avg_factor=None,
                reduction_override=None):
        """Forward function.
        Args:
            pred (torch.Tensor): The prediction.
                shape: [bs, ...]
            target (torch.Tensor): The learning target of the prediction.
                shape: [bs, ...]
            weight (torch.Tensor, optional): The weight of loss for each
                prediction. Defaults to None. 
                it's useful when the predictions are not all valid.
            avg_factor (int, optional): Average factor that is used to average
                the loss. Defaults to None.
            reduction_override (str, optional): The reduction method used to
                override the original reduction method of the loss.
                Defaults to None.
        """
        assert reduction_override in (None, 'none', 'mean', 'sum')
        reduction = (
            reduction_override if reduction_override else self.reduction)
        loss = smooth_l1_loss(
            pred, target, weight, reduction=reduction, avg_factor=avg_factor, beta=self.beta)
        return loss*self.loss_weight
 @mmcv.jit(derivate=True, coderize=True)
 @weighted_loss
 def bce(pred, label, class_weight=None):
    """
        pred: B,nquery,npts
        label: B,nquery,npts
    """
    if label.numel() == 0:
        return pred.sum() * 0
    assert pred.size() == label.size()
    loss = F.binary_cross_entropy_with_logits(
        pred, label.float(), pos_weight=class_weight, reduction='none')
    return loss
 @LOSSES.register_module()
 class MasksLoss(nn.Module):
    def __init__(self, reduction='mean', loss_weight=1.0):
        super(MasksLoss, self).__init__()
        self.reduction = reduction
        self.loss_weight = loss_weight
    def forward(self,
                pred,
                target,
                weight=None,
                avg_factor=None,
                reduction_override=None):
        """Forward function.
        Args:
            xxx
        """
        assert reduction_override in (None, 'none', 'mean', 'sum')
        reduction = (
            reduction_override if reduction_override else self.reduction)
        loss = bce(pred, target, weight, reduction=reduction,
                   avg_factor=avg_factor)
        return loss*self.loss_weight
 @mmcv.jit(derivate=True, coderize=True)
 @weighted_loss
 def ce(pred, label, class_weight=None):
    """
        pred: B*nquery,npts
        label: B*nquery,
    """
    if label.numel() == 0:
        return pred.sum() * 0
    loss = F.cross_entropy(
        pred, label, weight=class_weight, reduction='none')
    return loss
 @LOSSES.register_module()
 class LenLoss(nn.Module):
    def __init__(self, reduction='mean', loss_weight=1.0):
        super(LenLoss, self).__init__()
        self.reduction = reduction
        self.loss_weight = loss_weight
    def forward(self,
                pred,
                target,
                weight=None,
                avg_factor=None,
                reduction_override=None):
        """Forward function.
        Args:
            xxx
        """
        assert reduction_override in (None, 'none', 'mean', 'sum')
        reduction = (
            reduction_override if reduction_override else self.reduction)
        loss = ce(pred, target, weight, reduction=reduction,
                   avg_factor=avg_factor)
        return loss*self.loss_weight
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/mapers/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/mapers/__init__.py
 from .vectormapnet import VectorMapNet
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/mapers/base_mapper.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/mapers/base_mapper.py
 from abc import ABCMeta, abstractmethod
 import torch.nn as nn
 from mmcv.runner import auto_fp16
 from mmcv.utils import print_log
 from mmdet.utils import get_root_logger
 from mmdet3d.models.builder import DETECTORS
 MAPPERS = DETECTORS
 class BaseMapper(nn.Module, metaclass=ABCMeta):
    """Base class for mappers."""
    def __init__(self):
        super(BaseMapper, self).__init__()
        self.fp16_enabled = False
    @property
    def with_neck(self):
        """bool: whether the detector has a neck"""
        return hasattr(self, 'neck') and self.neck is not None
    # TODO: these properties need to be carefully handled
    # for both single stage & two stage detectors
    @property
    def with_shared_head(self):
        """bool: whether the detector has a shared head in the RoI Head"""
        return hasattr(self, 'roi_head') and self.roi_head.with_shared_head
    @property
    def with_bbox(self):
        """bool: whether the detector has a bbox head"""
        return ((hasattr(self, 'roi_head') and self.roi_head.with_bbox)
                or (hasattr(self, 'bbox_head') and self.bbox_head is not None))
    @property
    def with_mask(self):
        """bool: whether the detector has a mask head"""
        return ((hasattr(self, 'roi_head') and self.roi_head.with_mask)
                or (hasattr(self, 'mask_head') and self.mask_head is not None))
    #@abstractmethod
    def extract_feat(self, imgs):
        """Extract features from images."""
        pass
    def forward_train(self, *args, **kwargs):
        pass
    #@abstractmethod
    def simple_test(self, img, img_metas, **kwargs):
        pass
    #@abstractmethod
    def aug_test(self, imgs, img_metas, **kwargs):
        """Test function with test time augmentation."""
        pass
    def init_weights(self, pretrained=None):
        """Initialize the weights in detector.
        Args:
            pretrained (str, optional): Path to pre-trained weights.
                Defaults to None.
        """
        if pretrained is not None:
            logger = get_root_logger()
            print_log(f'load model from: {pretrained}', logger=logger)
    def forward_test(self, *args, **kwargs):
        """
        Args:
        """
        if True:
            self.simple_test()
        else:
            self.aug_test()
    # @auto_fp16(apply_to=('img', ))
    def forward(self, *args, return_loss=True, **kwargs):
        """Calls either :func:`forward_train` or :func:`forward_test` depending
        on whether ``return_loss`` is ``True``.
        Note this setting will change the expected inputs. When
        ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor
        and List[dict]), and when ``resturn_loss=False``, img and img_meta
        should be double nested (i.e.  List[Tensor], List[List[dict]]), with
        the outer list indicating test time augmentations.
        """
        if return_loss:
            return self.forward_train(*args, **kwargs)
        else:
            kwargs.pop('rescale')
            return self.forward_test(*args, **kwargs)
    def train_step(self, data_dict, optimizer):
        """The iteration step during training.
        This method defines an iteration step during training, except for the
        back propagation and optimizer updating, which are done in an optimizer
        hook. Note that in some complicated cases or models, the whole process
        including back propagation and optimizer updating is also defined in
        this method, such as GAN.
        Args:
            data_dict (dict): The output of dataloader.
            optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
                runner is passed to ``train_step()``. This argument is unused
                and reserved.
        Returns:
            dict: It should contain at least 3 keys: ``loss``, ``log_vars``, \
                ``num_samples``.
                - ``loss`` is a tensor for back propagation, which can be a \
                weighted sum of multiple losses.
                - ``log_vars`` contains all the variables to be sent to the
                logger.
                - ``num_samples`` indicates the batch size (when the model is \
                DDP, it means the batch size on each GPU), which is used for \
                averaging the logs.
        """
        loss, log_vars, num_samples = self(**data_dict)
        outputs = dict(
            loss=loss, log_vars=log_vars, num_samples=num_samples)
        return outputs
    def val_step(self, data, optimizer):
        """The iteration step during validation.
        This method shares the same signature as :func:`train_step`, but used
        during val epochs. Note that the evaluation after training epochs is
        not implemented with this method, but an evaluation hook.
        """
        loss, log_vars, num_samples = self(**data)
        outputs = dict(
            loss=loss, log_vars=log_vars, num_samples=num_samples)
        return outputs
    def show_result(self,
                    **kwargs):
        img = None
        return img
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/mapers/vectormapnet.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/mapers/vectormapnet.py
 import mmcv
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.utils.rnn import pad_sequence
 from torchvision.models.resnet import resnet18, resnet50
 from mmdet3d.models.builder import (build_backbone, build_head,
                                    build_neck)
 from .base_mapper import BaseMapper, MAPPERS
 @MAPPERS.register_module()
 class VectorMapNet(BaseMapper):
    def __init__(self,
                 backbone_cfg=dict(),
                 head_cfg=dict(
                     vert_net_cfg=dict(),
                     face_net_cfg=dict(),
                 ),
                 neck_input_channels=128,
                 neck_cfg=None,
                 with_auxiliary_head=False,
                 only_det=False,
                 train_cfg=None,
                 test_cfg=None,
                 pretrained=None,
                 model_name=None, **kwargs):
        super(VectorMapNet, self).__init__()
        #Attribute
        self.model_name = model_name
        self.last_epoch = None
        self.only_det = only_det
        self.backbone = build_backbone(backbone_cfg)
        if neck_cfg is not None:
            self.neck_neck = build_backbone(neck_cfg.backbone)
            self.neck_neck.conv1 = nn.Conv2d(
                neck_input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
            self.neck_project = build_neck(neck_cfg.neck)
            self.neck = self.multiscale_neck
        else:
            trunk = resnet18(pretrained=False, zero_init_residual=True)
            self.neck = nn.Sequential(
                nn.Conv2d(neck_input_channels, 64, kernel_size=(7, 7), stride=(
                    2, 2), padding=(3, 3), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1,
                            dilation=1, ceil_mode=False),
                trunk.layer1,
                nn.Conv2d(64, 128, kernel_size=1, bias=False),
            )
        # BEV 
        if hasattr(self.backbone,'bev_w'):
            self.bev_w = self.backbone.bev_w
            self.bev_h = self.backbone.bev_h
        self.head = build_head(head_cfg)
    def multiscale_neck(self, bev_embedding):
        multi_feat = self.neck_neck(bev_embedding)
        multi_feat = self.neck_project(multi_feat)
        return multi_feat
    def forward_train(self, img, polys, points=None, img_metas=None, **kwargs):
        '''
        Args:
            img: torch.Tensor of shape [B, N, 3, H, W]
                N: number of cams
            vectors: list[list[Tuple(lines, length, label)]]
                - lines: np.array of shape [num_points, 2]. 
                - length: int
                - label: int
                len(vectors) = batch_size
                len(vectors[_b]) = num of lines in sample _b
            img_metas: 
                img_metas['lidar2img']: [B, N, 4, 4]
        Out:
            loss, log_vars, num_sample
        '''
        #  prepare labels and images
        batch, img, img_metas, valid_idx, points = self.batch_data(
            polys, img, img_metas, img.device, points)
        # corner cases use hard code to prevent code fail
        if self.last_epoch is None:
            self.last_epoch = [batch, img, img_metas, valid_idx, points]
        if len(valid_idx)==0:
            batch, img, img_metas, valid_idx, points = self.last_epoch
        else:
            del self.last_epoch
            self.last_epoch = [batch, img, img_metas, valid_idx, points]
        # Backbone
        _bev_feats = self.backbone(img, img_metas=img_metas, points=points)
        img_shape = \
            [_bev_feats.shape[2:] for i in range(_bev_feats.shape[0])]
        # Neck
        bev_feats = self.neck(_bev_feats)
        preds_dict, losses_dict = \
            self.head(batch, 
                      context={
                        'bev_embeddings': bev_feats, 
                        'batch_input_shape': _bev_feats.shape[2:], 
                        'img_shape': img_shape,
                        'raw_bev_embeddings': _bev_feats},
                        only_det=self.only_det)
        # format outputs
        loss = 0
        for name, var in losses_dict.items():
            loss = loss + var
        # update the log
        log_vars = {k: v.item() for k, v in losses_dict.items()}
        log_vars.update({'total': loss.item()})
        num_sample = img.size(0)
        return loss, log_vars, num_sample
    @torch.no_grad()
    def forward_test(self, img, polys=None, points=None, img_metas=None, **kwargs):
        '''
            inference pipeline
        '''
        #  prepare labels and images
        token = []
        for img_meta in img_metas:
            token.append(img_meta['token'])
        _bev_feats = self.backbone(img, img_metas, points=points)
        img_shape = [_bev_feats.shape[2:] for i in range(_bev_feats.shape[0])]
        # Neck
        bev_feats = self.neck(_bev_feats)
        context = {'bev_embeddings': bev_feats,
                   'batch_input_shape': _bev_feats.shape[2:], 
                   'img_shape': img_shape, # XXX
                   'raw_bev_embeddings': _bev_feats}
        preds_dict = self.head(batch={},
                               context=context,
                               condition_on_det=True, 
                               gt_condition=False,
                               only_det=self.only_det)
        # Hard Code
        if preds_dict is None:
            return [None]
        results_list = self.head.post_process(preds_dict, token, only_det=self.only_det)
        return results_list
    def batch_data(self, polys, imgs, img_metas, device, points=None):
        # filter none vector's case
        valid_idx = [i for i in range(len(polys)) if len(polys[i])]
        imgs = imgs[valid_idx]
        img_metas = [img_metas[i] for i in valid_idx]
        polys = [polys[i] for i in valid_idx]
        if points is not None:
            points = [points[i] for i in valid_idx]
            points = self.batch_points(points)
        if len(valid_idx) == 0:
            return None, None, None, valid_idx, None
        batch = {}
        batch['det'] = format_det(polys,device)
        batch['gen'] = format_gen(polys,device)
        return batch, imgs, img_metas, valid_idx, points
    def batch_points(self, points):
        pad_points = pad_sequence(points, batch_first=True)
        points_mask = torch.zeros_like(pad_points[:,:,0]).bool()
        for i in range(len(points)):
            valid_num = points[i].shape[0]
            points_mask[i][:valid_num] = True
        return (pad_points, points_mask)
 def format_det(polys, device):
    batch = {
        'class_label':[],
        'batch_idx':[],
        'bbox': [],
    }
    for batch_idx, poly in enumerate(polys):
        keypoint_label = torch.from_numpy(poly['det_label']).to(device)
        keypoint = torch.from_numpy(poly['keypoint']).to(device)
        batch['class_label'].append(keypoint_label)
        batch['bbox'].append(keypoint)
    return batch
 def format_gen(polys,device):
    line_cls = []
    polylines, polyline_masks, polyline_weights = [], [], []
    bbox, line_cls, line_bs_idx = [], [], []
    for batch_idx, poly in enumerate(polys):
        # convert to cuda tensor
        for k in poly.keys():
            if isinstance(poly[k],np.ndarray):
                poly[k] = torch.from_numpy(poly[k]).to(device)
            else:
                poly[k] = [torch.from_numpy(v).to(device) for v in poly[k]]
        line_cls += poly['gen_label']
        line_bs_idx += [batch_idx]*len(poly['gen_label'])
        # condition
        bbox += poly['qkeypoint']
        # out
        polylines += poly['polylines']
        polyline_masks += poly['polyline_masks']
        polyline_weights += poly['polyline_weights']
    batch = {}
    batch['lines_bs_idx'] = torch.tensor(
        line_bs_idx, dtype=torch.long, device=device)
    batch['lines_cls'] = torch.tensor(
        line_cls, dtype=torch.long, device=device)
    batch['bbox_flat'] = torch.stack(bbox, 0)
    # padding
    batch['polylines'] = pad_sequence(polylines, batch_first=True)
    batch['polyline_masks'] = pad_sequence(polyline_masks, batch_first=True)
    batch['polyline_weights'] = pad_sequence(polyline_weights, batch_first=True)
    return batch
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/__init__.py
 from .deformable_transformer import DeformableDetrTransformer_, DeformableDetrTransformerDecoder_
 from .base_transformer import PlaceHolderEncoder
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/base_transformer.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/base_transformer.py
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn import xavier_init, constant_init
 from mmcv.cnn.bricks.registry import (ATTENTION,
                                      TRANSFORMER_LAYER_SEQUENCE)
 from mmcv.cnn.bricks.transformer import (MultiScaleDeformableAttention,
                                         TransformerLayerSequence,
                                         build_transformer_layer_sequence)
 from mmcv.runner.base_module import BaseModule
 from mmdet.models.utils.builder import TRANSFORMER
 @TRANSFORMER_LAYER_SEQUENCE.register_module()
 class PlaceHolderEncoder(nn.Module):
    def __init__(self, *args, embed_dims=None, **kwargs):
        super(PlaceHolderEncoder, self).__init__()
        self.embed_dims = embed_dims
    def forward(self, *args, query=None, **kwargs):
        return query
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/deformable_transformer.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/deformable_transformer.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
 import warnings
 import torch
 import torch.nn as nn
 from mmcv.cnn import build_activation_layer, build_norm_layer, xavier_init
 from mmcv.cnn.bricks.registry import (TRANSFORMER_LAYER,
                                      TRANSFORMER_LAYER_SEQUENCE)
 from mmcv.cnn.bricks.transformer import (BaseTransformerLayer,
                                         TransformerLayerSequence,
                                         build_transformer_layer_sequence)
 from mmcv.runner.base_module import BaseModule
 from torch.nn.init import normal_
 from mmdet.models.utils.builder import TRANSFORMER
 from mmdet.models.utils.transformer import Transformer
 try:
    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention
 except ImportError:
    warnings.warn(
        '`MultiScaleDeformableAttention` in MMCV has been moved to '
        '`mmcv.ops.multi_scale_deform_attn`, please update your MMCV')
    from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention
 from .fp16_dattn import MultiScaleDeformableAttentionFp16
 def inverse_sigmoid(x, eps=1e-5):
    """Inverse function of sigmoid.
    Args:
        x (Tensor): The tensor to do the
            inverse.
        eps (float): EPS avoid numerical
            overflow. Defaults 1e-5.
    Returns:
        Tensor: The x has passed the inverse
            function of sigmoid, has same
            shape with input.
    """
    x = x.clamp(min=0, max=1)
    x1 = x.clamp(min=eps)
    x2 = (1 - x).clamp(min=eps)
    return torch.log(x1 / x2)
 @TRANSFORMER_LAYER_SEQUENCE.register_module()
 class DeformableDetrTransformerDecoder_(TransformerLayerSequence):
    """Implements the decoder in DETR transformer.
    Args:
        return_intermediate (bool): Whether to return intermediate outputs.
        coder_norm_cfg (dict): Config of last normalization layer. Default：
            `LN`.
    """
    def __init__(self, *args, 
            return_intermediate=False, coord_dim=2, kp_coord_dim=2, **kwargs):
        super(DeformableDetrTransformerDecoder_, self).__init__(*args, **kwargs)
        self.return_intermediate = return_intermediate
        self.coord_dim = coord_dim
        self.kp_coord_dim = kp_coord_dim
    def forward(self,
                query,
                *args,
                reference_points=None,
                valid_ratios=None,
                reg_branches=None,
                **kwargs):
        """Forward function for `TransformerDecoder`.
        Args:
            query (Tensor): Input query with shape
                `(num_query, bs, embed_dims)`.
            reference_points (Tensor): The reference
                points of offset. has shape
                (bs, num_query, 4) when as_two_stage,
                otherwise has shape ((bs, num_query, 2).
            valid_ratios (Tensor): The radios of valid
                points on the feature map, has shape
                (bs, num_levels, 2)
            reg_branch: (obj:`nn.ModuleList`): Used for
                refining the regression results. Only would
                be passed when with_box_refine is True,
                otherwise would be passed a `None`.
        Returns:
            Tensor: Results with shape [1, num_query, bs, embed_dims] when
                return_intermediate is `False`, otherwise it has shape
                [num_layers, num_query, bs, embed_dims].
        """
        output = query
        intermediate = []
        intermediate_reference_points = []
        for lid, layer in enumerate(self.layers):
            reference_points_input = \
                reference_points[:, :, None,:self.kp_coord_dim] * \
                valid_ratios[:, None,:,:self.kp_coord_dim]
            # if reference_points.shape[-1] == 3 and self.kp_coord_dim==2:
            output = layer(
                output,
                *args,
                reference_points=reference_points_input[...,:self.kp_coord_dim],
                **kwargs)
            output = output.permute(1, 0, 2)
            if reg_branches is not None:
                tmp = reg_branches[lid](output)   
                new_reference_points = tmp
                new_reference_points[..., :self.kp_coord_dim] = tmp[
                    ..., :self.kp_coord_dim] + inverse_sigmoid(reference_points)
                new_reference_points = new_reference_points.sigmoid()
                if reference_points.shape[-1] == 3 and self.kp_coord_dim==2:
                    reference_points[...,-1] = tmp[...,-1].sigmoid().detach()  
                reference_points[...,:self.coord_dim] = new_reference_points.detach()
            output = output.permute(1, 0, 2)
            if self.return_intermediate:
                intermediate.append(output)
                intermediate_reference_points.append(reference_points)
        if self.return_intermediate:
            return torch.stack(intermediate), torch.stack(
                intermediate_reference_points)
        return output, reference_points
 @TRANSFORMER.register_module()
 class DeformableDetrTransformer_(Transformer):
    """Implements the DeformableDETR transformer.
    Args:
        as_two_stage (bool): Generate query from encoder features.
            Default: False.
        num_feature_levels (int): Number of feature maps from FPN:
            Default: 4.
        two_stage_num_proposals (int): Number of proposals when set
            `as_two_stage` as True. Default: 300.
    """
    def __init__(self,
                 as_two_stage=False,
                 num_feature_levels=1,
                 two_stage_num_proposals=300,
                 coord_dim=2,
                 **kwargs):
        super(DeformableDetrTransformer_, self).__init__(**kwargs)
        self.as_two_stage = as_two_stage
        self.num_feature_levels = num_feature_levels
        self.two_stage_num_proposals = two_stage_num_proposals
        self.embed_dims = self.encoder.embed_dims
        self.coord_dim = coord_dim
        self.init_layers()
    def init_layers(self):
        """Initialize layers of the DeformableDetrTransformer."""
        self.level_embeds = nn.Parameter(
            torch.Tensor(self.num_feature_levels, self.embed_dims))
        if self.as_two_stage:
            self.enc_output = nn.Linear(self.embed_dims, self.embed_dims)
            self.enc_output_norm = nn.LayerNorm(self.embed_dims)
            self.pos_trans = nn.Linear(self.embed_dims * 2,
                                       self.embed_dims * 2)
            self.pos_trans_norm = nn.LayerNorm(self.embed_dims * 2)
        else:
            self.reference_points_embed = nn.Linear(self.embed_dims, self.coord_dim)
    def init_weights(self):
        """Initialize the transformer weights."""
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
        for m in self.modules():
            if isinstance(m, MultiScaleDeformableAttention):
                m.init_weights()
            elif isinstance(m,MultiScaleDeformableAttentionFp16):
                m.init_weights()
        if not self.as_two_stage:
            xavier_init(self.reference_points_embed, distribution='uniform', bias=0.)
        normal_(self.level_embeds)
    @staticmethod
    def get_reference_points(spatial_shapes, valid_ratios, device):
        """Get the reference points used in decoder.
        Args:
            spatial_shapes (Tensor): The shape of all
                feature maps, has shape (num_level, 2).
            valid_ratios (Tensor): The radios of valid
                points on the feature map, has shape
                (bs, num_levels, 2)
            device (obj:`device`): The device where
                reference_points should be.
        Returns:
            Tensor: reference points used in decoder, has \
                shape (bs, num_keys, num_levels, 2).
        """
        reference_points_list = []
        for lvl, (H, W) in enumerate(spatial_shapes):
            #  TODO  check this 0.5
            ref_y, ref_x = torch.meshgrid(
                torch.linspace(
                    0.5, H - 0.5, H, dtype=torch.float32, device=device),
                torch.linspace(
                    0.5, W - 0.5, W, dtype=torch.float32, device=device))
            ref_y = ref_y.reshape(-1)[None] / (
                valid_ratios[:, None, lvl, 1] * H)
            ref_x = ref_x.reshape(-1)[None] / (
                valid_ratios[:, None, lvl, 0] * W)
            ref = torch.stack((ref_x, ref_y), -1)
            reference_points_list.append(ref)
        reference_points = torch.cat(reference_points_list, 1)
        reference_points = reference_points[:, :, None] * valid_ratios[:, None]
        return reference_points
    def get_valid_ratio(self, mask):
        """Get the valid radios of feature maps of all  level."""
        _, H, W = mask.shape
        valid_H = torch.sum(~mask[:, :, 0], 1)
        valid_W = torch.sum(~mask[:, 0, :], 1)
        valid_ratio_h = valid_H.float() / H
        valid_ratio_w = valid_W.float() / W
        valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)
        return valid_ratio
    def get_proposal_pos_embed(self,
                               proposals,
                               num_pos_feats=128,
                               temperature=10000):
        """Get the position embedding of proposal."""
        scale = 2 * math.pi
        dim_t = torch.arange(
            num_pos_feats, dtype=torch.float32, device=proposals.device)
        dim_t = temperature**(2 * (dim_t // 2) / num_pos_feats)
        # N, L, 4
        proposals = proposals.sigmoid() * scale
        # N, L, 4, 128
        pos = proposals[:, :, :, None] / dim_t
        # N, L, 4, 64, 2
        pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()),
                          dim=4).flatten(2)
        return pos
    def forward(self,
                mlvl_feats,
                mlvl_masks,
                query_embed,
                mlvl_pos_embeds,
                reg_branches=None,
                cls_branches=None,
                **kwargs):
        """Forward function for `Transformer`.
        Args:
            mlvl_feats (list(Tensor)): Input queries from
                different level. Each element has shape
                [bs, embed_dims, h, w].
            mlvl_masks (list(Tensor)): The key_padding_mask from
                different level used for encoder and decoder,
                each element has shape  [bs, h, w].
            query_embed (Tensor): The query embedding for decoder,
                with shape [num_query, c].
            mlvl_pos_embeds (list(Tensor)): The positional encoding
                of feats from different level, has the shape
                 [bs, embed_dims, h, w].
            reg_branches (obj:`nn.ModuleList`): Regression heads for
                feature maps from each decoder layer. Only would
                be passed when
                `with_box_refine` is True. Default to None.
            cls_branches (obj:`nn.ModuleList`): Classification heads
                for feature maps from each decoder layer. Only would
                 be passed when `as_two_stage`
                 is True. Default to None.
        Returns:
            tuple[Tensor]: results of decoder containing the following tensor.
                - inter_states: Outputs from decoder. If
                    return_intermediate_dec is True output has shape \
                      (num_dec_layers, bs, num_query, embed_dims), else has \
                      shape (1, bs, num_query, embed_dims).
                - init_reference_out: The initial value of reference \
                    points, has shape (bs, num_queries, 4).
                - inter_references_out: The internal value of reference \
                    points in decoder, has shape \
                    (num_dec_layers, bs,num_query, embed_dims)
                - enc_outputs_class: The classification score of \
                    proposals generated from \
                    encoder's feature maps, has shape \
                    (batch, h*w, num_classes). \
                    Only would be returned when `as_two_stage` is True, \
                    otherwise None.
                - enc_outputs_coord_unact: The regression results \
                    generated from encoder's feature maps., has shape \
                    (batch, h*w, 4). Only would \
                    be returned when `as_two_stage` is True, \
                    otherwise None.
        """
        assert self.as_two_stage or query_embed is not None
        feat_flatten = []
        mask_flatten = []
        lvl_pos_embed_flatten = []
        spatial_shapes = []
        for lvl, (feat, mask, pos_embed) in enumerate(
                zip(mlvl_feats, mlvl_masks, mlvl_pos_embeds)):
            bs, c, h, w = feat.shape
            spatial_shape = (h, w)
            spatial_shapes.append(spatial_shape)
            feat = feat.flatten(2).transpose(1, 2)
            mask = mask.flatten(1)
            pos_embed = pos_embed.flatten(2).transpose(1, 2)
            lvl_pos_embed = pos_embed + self.level_embeds[lvl].view(1, 1, -1)
            lvl_pos_embed_flatten.append(lvl_pos_embed)
            feat_flatten.append(feat)
            mask_flatten.append(mask)
        feat_flatten = torch.cat(feat_flatten, 1)
        mask_flatten = torch.cat(mask_flatten, 1)
        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)
        spatial_shapes = torch.as_tensor(
            spatial_shapes, dtype=torch.long, device=feat_flatten.device)
        level_start_index = torch.cat((spatial_shapes.new_zeros(
            (1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))
        valid_ratios = torch.stack(
            [self.get_valid_ratio(m) for m in mlvl_masks], 1)
        # reference_points = \
        #     self.get_reference_points(spatial_shapes,
        #                               valid_ratios,
        #                               device=feat.device)
        feat_flatten = feat_flatten.permute(1, 0, 2)  # (H*W, bs, embed_dims)
        # lvl_pos_embed_flatten = lvl_pos_embed_flatten.permute(
        #     1, 0, 2)  # (H*W, bs, embed_dims)
        # memory = self.encoder(
        #     query=feat_flatten,
        #     key=None,
        #     value=None,
        #     query_pos=lvl_pos_embed_flatten,
        #     query_key_padding_mask=mask_flatten,
        #     spatial_shapes=spatial_shapes,
        #     reference_points=reference_points,
        #     level_start_index=level_start_index,
        #     valid_ratios=valid_ratios,
        #     **kwargs)
        memory = feat_flatten.permute(1, 0, 2)
        bs, _, c = memory.shape
        query_pos, query = torch.split(query_embed, c, dim=-1)
        reference_points = self.reference_points_embed(query_pos).sigmoid()
        init_reference_out = reference_points
        # decoder
        query = query.permute(1, 0, 2)
        memory = memory.permute(1, 0, 2)
        query_pos = query_pos.permute(1, 0, 2)
        inter_states, inter_references = self.decoder(
            query=query,
            key=None,
            value=memory,
            query_pos=query_pos,
            key_padding_mask=mask_flatten,
            reference_points=reference_points,
            spatial_shapes=spatial_shapes,
            level_start_index=level_start_index,
            valid_ratios=valid_ratios,
            reg_branches=reg_branches,
            **kwargs)
        inter_references_out = inter_references
        return inter_states, init_reference_out, inter_references_out
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/fp16_dattn.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/transformer_utils/fp16_dattn.py
 from turtle import forward
 import warnings
 try:
    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention
 except ImportError:
    warnings.warn(
        '`MultiScaleDeformableAttention` in MMCV has been moved to '
        '`mmcv.ops.multi_scale_deform_attn`, please update your MMCV')
    from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention
 from mmcv.runner import force_fp32, auto_fp16
 from mmcv.cnn.bricks.registry import ATTENTION
 from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
 from mmcv.cnn.bricks.transformer import build_attention
 import math
 import warnings
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd.function import Function, once_differentiable
 from mmcv import deprecated_api_warning
 from mmcv.cnn import constant_init, xavier_init
 from mmcv.cnn.bricks.registry import ATTENTION
 from mmcv.runner import BaseModule
 from mmcv.utils import ext_loader
 ext_module = ext_loader.load_ext(
    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
 from torch.cuda.amp import custom_bwd, custom_fwd
 @ATTENTION.register_module()
 class MultiScaleDeformableAttentionFp16(BaseModule):
    def __init__(self, attn_cfg=None,init_cfg=None,**kwarg):
        super(MultiScaleDeformableAttentionFp16,self).__init__(init_cfg)
        # import ipdb; ipdb.set_trace()
        self.deformable_attention = build_attention(attn_cfg)
        self.deformable_attention.init_weights()
        self.fp16_enabled = False
    @force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points','identity'))
    def forward(self, query,
                key=None,
                value=None,
                identity=None,
                query_pos=None,
                key_padding_mask=None,
                reference_points=None,
                spatial_shapes=None,
                level_start_index=None,
                **kwargs):
        # import ipdb; ipdb.set_trace()
        return self.deformable_attention(query,
                key=key,
                value=value,
                identity=identity,
                query_pos=query_pos,
                key_padding_mask=key_padding_mask,
                reference_points=reference_points,
                spatial_shapes=spatial_shapes,
                level_start_index=level_start_index,**kwargs)
 class MultiScaleDeformableAttnFunctionFp32(Function):
    @staticmethod
    @custom_fwd(cast_inputs=torch.float32)
    def forward(ctx, value, value_spatial_shapes, value_level_start_index,
                sampling_locations, attention_weights, im2col_step):
        """GPU version of multi-scale deformable attention.
        Args:
            value (Tensor): The value has shape
                (bs, num_keys, mum_heads, embed_dims//num_heads)
            value_spatial_shapes (Tensor): Spatial shape of
                each feature map, has shape (num_levels, 2),
                last dimension 2 represent (h, w)
            sampling_locations (Tensor): The location of sampling points,
                has shape
                (bs ,num_queries, num_heads, num_levels, num_points, 2),
                the last dimension 2 represent (x, y).
            attention_weights (Tensor): The weight of sampling points used
                when calculate the attention, has shape
                (bs ,num_queries, num_heads, num_levels, num_points),
            im2col_step (Tensor): The step used in image to column.
        Returns:
            Tensor: has shape (bs, num_queries, embed_dims)
        """
        ctx.im2col_step = im2col_step
        output = ext_module.ms_deform_attn_forward(
            value,
            value_spatial_shapes,
            value_level_start_index,
            sampling_locations,
            attention_weights,
            im2col_step=ctx.im2col_step)
        ctx.save_for_backward(value, value_spatial_shapes,
                              value_level_start_index, sampling_locations,
                              attention_weights)
        return output
    @staticmethod
    @once_differentiable
    @custom_bwd
    def backward(ctx, grad_output):
        """GPU version of backward function.
        Args:
            grad_output (Tensor): Gradient
                of output tensor of forward.
        Returns:
             Tuple[Tensor]: Gradient
                of input tensors in forward.
        """
        value, value_spatial_shapes, value_level_start_index,\
            sampling_locations, attention_weights = ctx.saved_tensors
        grad_value = torch.zeros_like(value)
        grad_sampling_loc = torch.zeros_like(sampling_locations)
        grad_attn_weight = torch.zeros_like(attention_weights)
        ext_module.ms_deform_attn_backward(
            value,
            value_spatial_shapes,
            value_level_start_index,
            sampling_locations,
            attention_weights,
            grad_output.contiguous(),
            grad_value,
            grad_sampling_loc,
            grad_attn_weight,
            im2col_step=ctx.im2col_step)
        return grad_value, None, None, \
            grad_sampling_loc, grad_attn_weight, None
 def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
                                        sampling_locations, attention_weights):
    """CPU version of multi-scale deformable attention.
    Args:
        value (Tensor): The value has shape
            (bs, num_keys, mum_heads, embed_dims//num_heads)
        value_spatial_shapes (Tensor): Spatial shape of
            each feature map, has shape (num_levels, 2),
            last dimension 2 represent (h, w)
        sampling_locations (Tensor): The location of sampling points,
            has shape
            (bs ,num_queries, num_heads, num_levels, num_points, 2),
            the last dimension 2 represent (x, y).
        attention_weights (Tensor): The weight of sampling points used
            when calculate the attention, has shape
            (bs ,num_queries, num_heads, num_levels, num_points),
    Returns:
        Tensor: has shape (bs, num_queries, embed_dims)
    """
    bs, _, num_heads, embed_dims = value.shape
    _, num_queries, num_heads, num_levels, num_points, _ =\
        sampling_locations.shape
    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],
                             dim=1)
    sampling_grids = 2 * sampling_locations - 1
    sampling_value_list = []
    for level, (H_, W_) in enumerate(value_spatial_shapes):
        # bs, H_*W_, num_heads, embed_dims ->
        # bs, H_*W_, num_heads*embed_dims ->
        # bs, num_heads*embed_dims, H_*W_ ->
        # bs*num_heads, embed_dims, H_, W_
        value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(
            bs * num_heads, embed_dims, H_, W_)
        # bs, num_queries, num_heads, num_points, 2 ->
        # bs, num_heads, num_queries, num_points, 2 ->
        # bs*num_heads, num_queries, num_points, 2
        sampling_grid_l_ = sampling_grids[:, :, :,
                                          level].transpose(1, 2).flatten(0, 1)
        # bs*num_heads, embed_dims, num_queries, num_points
        sampling_value_l_ = F.grid_sample(
            value_l_,
            sampling_grid_l_,
            mode='bilinear',
            padding_mode='zeros',
            align_corners=False)
        sampling_value_list.append(sampling_value_l_)
    # (bs, num_queries, num_heads, num_levels, num_points) ->
    # (bs, num_heads, num_queries, num_levels, num_points) ->
    # (bs, num_heads, 1, num_queries, num_levels*num_points)
    attention_weights = attention_weights.transpose(1, 2).reshape(
        bs * num_heads, 1, num_queries, num_levels * num_points)
    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *
              attention_weights).sum(-1).view(bs, num_heads * embed_dims,
                                              num_queries)
    return output.transpose(1, 2).contiguous()
 @ATTENTION.register_module()
 class MultiScaleDeformableAttentionFP32(BaseModule):
    """An attention module used in Deformable-Detr. `Deformable DETR:
    Deformable Transformers for End-to-End Object Detection.
      <https://arxiv.org/pdf/2010.04159.pdf>`_.
    Args:
        embed_dims (int): The embedding dimension of Attention.
            Default: 256.
        num_heads (int): Parallel attention heads. Default: 64.
        num_levels (int): The number of feature map used in
            Attention. Default: 4.
        num_points (int): The number of sampling points for
            each query in each head. Default: 4.
        im2col_step (int): The step used in image_to_column.
            Default: 64.
        dropout (float): A Dropout layer on `inp_identity`.
            Default: 0.1.
        batch_first (bool): Key, Query and Value are shape of
            (batch, n, embed_dim)
            or (n, batch, embed_dim). Default to False.
        norm_cfg (dict): Config dict for normalization layer.
            Default: None.
        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
            Default: None.
    """
    def __init__(self,
                 embed_dims=256,
                 num_heads=8,
                 num_levels=4,
                 num_points=4,
                 im2col_step=64,
                 dropout=0.1,
                 batch_first=False,
                 norm_cfg=None,
                 init_cfg=None):
        super().__init__(init_cfg)
        if embed_dims % num_heads != 0:
            raise ValueError(f'embed_dims must be divisible by num_heads, '
                             f'but got {embed_dims} and {num_heads}')
        dim_per_head = embed_dims // num_heads
        self.norm_cfg = norm_cfg
        self.dropout = nn.Dropout(dropout)
        self.batch_first = batch_first
        # you'd better set dim_per_head to a power of 2
        # which is more efficient in the CUDA implementation
        def _is_power_of_2(n):
            if (not isinstance(n, int)) or (n < 0):
                raise ValueError(
                    'invalid input for _is_power_of_2: {} (type: {})'.format(
                        n, type(n)))
            return (n & (n - 1) == 0) and n != 0
        if not _is_power_of_2(dim_per_head):
            warnings.warn(
                "You'd better set embed_dims in "
                'MultiScaleDeformAttention to make '
                'the dimension of each attention head a power of 2 '
                'which is more efficient in our CUDA implementation.')
        self.im2col_step = im2col_step
        self.embed_dims = embed_dims
        self.num_levels = num_levels
        self.num_heads = num_heads
        self.num_points = num_points
        self.sampling_offsets = nn.Linear(
            embed_dims, num_heads * num_levels * num_points * 2)
        self.attention_weights = nn.Linear(embed_dims,
                                           num_heads * num_levels * num_points)
        self.value_proj = nn.Linear(embed_dims, embed_dims)
        self.output_proj = nn.Linear(embed_dims, embed_dims)
        self.init_weights()
    def init_weights(self):
        """Default initialization for Parameters of Module."""
        constant_init(self.sampling_offsets, 0.)
        thetas = torch.arange(
            self.num_heads,
            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
        grid_init = (grid_init /
                     grid_init.abs().max(-1, keepdim=True)[0]).view(
                         self.num_heads, 1, 1,
                         2).repeat(1, self.num_levels, self.num_points, 1)
        for i in range(self.num_points):
            grid_init[:, :, i, :] *= i + 1
        self.sampling_offsets.bias.data = grid_init.view(-1)
        constant_init(self.attention_weights, val=0., bias=0.)
        xavier_init(self.value_proj, distribution='uniform', bias=0.)
        xavier_init(self.output_proj, distribution='uniform', bias=0.)
        self._is_init = True
    @deprecated_api_warning({'residual': 'identity'},
                            cls_name='MultiScaleDeformableAttention')
    def forward(self,
                query,
                key=None,
                value=None,
                identity=None,
                query_pos=None,
                key_padding_mask=None,
                reference_points=None,
                spatial_shapes=None,
                level_start_index=None,
                **kwargs):
        """Forward Function of MultiScaleDeformAttention.
        Args:
            query (Tensor): Query of Transformer with shape
                (num_query, bs, embed_dims).
            key (Tensor): The key tensor with shape
                `(num_key, bs, embed_dims)`.
            value (Tensor): The value tensor with shape
                `(num_key, bs, embed_dims)`.
            identity (Tensor): The tensor used for addition, with the
                same shape as `query`. Default None. If None,
                `query` will be used.
            query_pos (Tensor): The positional encoding for `query`.
                Default: None.
            key_pos (Tensor): The positional encoding for `key`. Default
                None.
            reference_points (Tensor):  The normalized reference
                points with shape (bs, num_query, num_levels, 2),
                all elements is range in [0, 1], top-left (0,0),
                bottom-right (1, 1), including padding area.
                or (N, Length_{query}, num_levels, 4), add
                additional two dimensions is (w, h) to
                form reference boxes.
            key_padding_mask (Tensor): ByteTensor for `query`, with
                shape [bs, num_key].
            spatial_shapes (Tensor): Spatial shape of features in
                different levels. With shape (num_levels, 2),
                last dimension represents (h, w).
            level_start_index (Tensor): The start index of each level.
                A tensor has shape ``(num_levels, )`` and can be represented
                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
        Returns:
             Tensor: forwarded results with shape [num_query, bs, embed_dims].
        """
        if value is None:
            value = query
        if identity is None:
            identity = query
        if query_pos is not None:
            query = query + query_pos
        if not self.batch_first:
            # change to (bs, num_query ,embed_dims)
            query = query.permute(1, 0, 2)
            value = value.permute(1, 0, 2)
        bs, num_query, _ = query.shape
        bs, num_value, _ = value.shape
        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
        value = self.value_proj(value)
        if key_padding_mask is not None:
            value = value.masked_fill(key_padding_mask[..., None], 0.0)
        value = value.view(bs, num_value, self.num_heads, -1)
        sampling_offsets = self.sampling_offsets(query).view(
            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
        attention_weights = self.attention_weights(query).view(
            bs, num_query, self.num_heads, self.num_levels * self.num_points)
        attention_weights = attention_weights.softmax(-1)
        attention_weights = attention_weights.view(bs, num_query,
                                                   self.num_heads,
                                                   self.num_levels,
                                                   self.num_points)
        if reference_points.shape[-1] == 2:
            offset_normalizer = torch.stack(
                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
            sampling_locations = reference_points[:, :, None, :, None, :] \
                + sampling_offsets \
                / offset_normalizer[None, None, None, :, None, :]
        elif reference_points.shape[-1] == 4:
            sampling_locations = reference_points[:, :, None, :, None, :2] \
                + sampling_offsets / self.num_points \
                * reference_points[:, :, None, :, None, 2:] \
                * 0.5
        else:
            raise ValueError(
                f'Last dim of reference_points must be'
                f' 2 or 4, but get {reference_points.shape[-1]} instead.')
        if torch.cuda.is_available():
            output = MultiScaleDeformableAttnFunctionFp32.apply(
                value, spatial_shapes, level_start_index, sampling_locations,
                attention_weights, self.im2col_step)
        else:
            output = multi_scale_deformable_attn_pytorch(
                value, spatial_shapes, level_start_index, sampling_locations,
                attention_weights, self.im2col_step)
        output = self.output_proj(output)
        if not self.batch_first:
            # (num_query, bs ,embed_dims)
            output = output.permute(1, 0, 2)
        return self.dropout(output) + identity
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/dist_test.sh
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/dist_test.sh
 #!/usr/bin/env bash
 CONFIG=$1
 CHECKPOINT=$2
 GPUS=$3
 PORT=${PORT:-29500}
 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
    $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/dist_train.sh
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/dist_train.sh
 #!/usr/bin/env bash
 CONFIG=$1
 GPUS=$2
 PORT=${PORT:-29500}
 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
    $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/evaluate_submission.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/evaluate_submission.py
 import sys
 import os
 sys.path.append(os.path.abspath('.'))    
 from src.datasets.evaluation.vector_eval import VectorEvaluate
 import argparse
 def parse_args():
    parser = argparse.ArgumentParser(
        description='Evaluate a submission file')
    parser.add_argument('submission', 
        help='submission file in pickle or json format to be evaluated')
    parser.add_argument('gt', 
        help='gt annotation file')
    args = parser.parse_args()
    return args
 def main(args):
    evaluator = VectorEvaluate(args.gt, n_workers=0)
    results = evaluator.evaluate(args.submission)
    print(results)
 if __name__ == '__main__':
    args = parse_args()
    main(args)
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/mmdet_test.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/mmdet_test.py
 import os.path as osp
 import pickle
 import shutil
 import tempfile
 import time
 import mmcv
 import torch
 import torch.distributed as dist
 from mmcv.image import tensor2imgs
 from mmcv.runner import get_dist_info
 from mmdet.core import encode_mask_results
 def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)
        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)
            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]
                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))
                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None
                model.module.show_result(
                    img_show,
                    result[i],
                    show=show,
                    out_file=out_file,
                    score_thr=show_score_thr)
        # encode mask results
        if isinstance(result[0], tuple):
            result = [(bbox_results, encode_mask_results(mask_results))
                      for bbox_results, mask_results in result]
        results.extend(result)
        for _ in range(batch_size):
            prog_bar.update()
    return results
 def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
    """Test model with multiple gpus.
    This method tests model with multiple gpus and collects the results
    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
    it encodes results to gpu tensors and use gpu communication for results
    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
    and collects them by the rank 0 worker.
    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        tmpdir (str): Path of directory to save the temporary results from
            different gpus under cpu mode.
        gpu_collect (bool): Option to use either gpu or cpu to collect results.
    Returns:
        list: The prediction results.
    """
    model.eval()
    results = []
    dataset = data_loader.dataset
    rank, world_size = get_dist_info()
    if rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))
    time.sleep(2)  # This line can prevent deadlock problem in some cases.
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)
            # encode mask results
            # if isinstance(result[0], tuple):
            #     result = [(bbox_results, encode_mask_results(mask_results))
            #               for bbox_results, mask_results in result]
        results.extend(result)
        if rank == 0:
            batch_size = len(result)
            for _ in range(batch_size * world_size):
                prog_bar.update()
    # collect results from all ranks
    if gpu_collect:
        results = collect_results_gpu(results, len(dataset))
    else:
        results = collect_results_cpu(results, len(dataset), tmpdir)
    return results
 def collect_results_cpu(result_part, size, tmpdir=None):
    rank, world_size = get_dist_info()
    # create a tmp dir if it is not specified
    if tmpdir is None:
        MAX_LEN = 512
        # 32 is whitespace
        dir_tensor = torch.full((MAX_LEN, ),
                                32,
                                dtype=torch.uint8,
                                device='cuda')
        if rank == 0:
            mmcv.mkdir_or_exist('.dist_test')
            tmpdir = tempfile.mkdtemp(dir='.dist_test')
            tmpdir = torch.tensor(
                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
            dir_tensor[:len(tmpdir)] = tmpdir
        dist.broadcast(dir_tensor, 0)
        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
    else:
        mmcv.mkdir_or_exist(tmpdir)
    # dump the part result to the dir
    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
    dist.barrier()
    # collect all parts
    if rank != 0:
        return None
    else:
        # load results of all parts from tmp dir
        part_list = []
        for i in range(world_size):
            part_file = osp.join(tmpdir, f'part_{i}.pkl')
            part_list.append(mmcv.load(part_file))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        # remove tmp dir
        shutil.rmtree(tmpdir)
        return ordered_results
 def collect_results_gpu(result_part, size):
    rank, world_size = get_dist_info()
    # dump result part to tensor with pickle
    part_tensor = torch.tensor(
        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
    # gather all result part tensor shape
    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
    shape_list = [shape_tensor.clone() for _ in range(world_size)]
    dist.all_gather(shape_list, shape_tensor)
    # padding result part tensor to max length
    shape_max = torch.tensor(shape_list).max()
    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
    part_send[:shape_tensor[0]] = part_tensor
    part_recv_list = [
        part_tensor.new_zeros(shape_max) for _ in range(world_size)
    ]
    # gather all result part
    dist.all_gather(part_recv_list, part_send)
    if rank == 0:
        part_list = []
        for recv, shape in zip(part_recv_list, shape_list):
            part_list.append(
                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
        # sort the results
        ordered_results = []
        for res in zip(*part_list):
            ordered_results.extend(list(res))
        # the dataloader may pad some samples
        ordered_results = ordered_results[:size]
        return ordered_results
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/mmdet_train.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/mmdet_train.py
 import random
 import warnings
 import numpy as np
 import torch
 from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
                         Fp16OptimizerHook, OptimizerHook, build_optimizer,
                         build_runner)
 from mmcv.utils import build_from_cfg
 from mmdet.core import DistEvalHook, EvalHook
 from mmdet.datasets import (build_dataloader, build_dataset,
                            replace_ImageToTensor)
 from mmdet.utils import get_root_logger
 def set_random_seed(seed, deterministic=False):
    """Set random seed.
    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
 def train_detector(model,
                   dataset,
                   cfg,
                   distributed=False,
                   validate=False,
                   timestamp=None,
                   meta=None):
    logger = get_root_logger(cfg.log_level)
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    if 'imgs_per_gpu' in cfg.data:
        logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
                       'Please use "samples_per_gpu" instead')
        if 'samples_per_gpu' in cfg.data:
            logger.warning(
                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
                f'={cfg.data.imgs_per_gpu} is used in this experiments')
        else:
            logger.warning(
                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
                f'{cfg.data.imgs_per_gpu} in this experiments')
        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
    data_loaders = [
        build_dataloader(
            ds,
            cfg.data.samples_per_gpu,
            cfg.data.workers_per_gpu,
            # cfg.gpus will be ignored if distributed
            len(cfg.gpu_ids),
            dist=distributed,
            seed=cfg.seed) for ds in dataset
    ]
    # put model on gpus
    if distributed:
        find_unused_parameters = cfg.get('find_unused_parameters', False)
        # Sets the `find_unused_parameters` parameter in
        # torch.nn.parallel.DistributedDataParallel
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False,
            find_unused_parameters=find_unused_parameters)
    else:
        model = MMDataParallel(
            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    if 'runner' not in cfg:
        cfg.runner = {
            'type': 'EpochBasedRunner',
            'max_epochs': cfg.total_epochs
        }
        warnings.warn(
            'config is now expected to have a `runner` section, '
            'please set `runner` in your config.', UserWarning)
    else:
        if 'total_epochs' in cfg:
            assert cfg.total_epochs == cfg.runner.max_epochs
    runner = build_runner(
        cfg.runner,
        default_args=dict(
            model=model,
            optimizer=optimizer,
            work_dir=cfg.work_dir,
            logger=logger,
            meta=meta))
    # an ugly workaround to make .log and .log.json filenames the same
    runner.timestamp = timestamp
    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(
            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
    elif distributed and 'type' not in cfg.optimizer_config:
        optimizer_config = OptimizerHook(**cfg.optimizer_config)
    else:
        optimizer_config = cfg.optimizer_config
    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config,
                                   cfg.get('momentum_config', None))
    if distributed:
        if isinstance(runner, EpochBasedRunner):
            runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        # Support batch_size > 1 in validation
        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
        if val_samples_per_gpu > 1:
            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
            cfg.data.val.pipeline = replace_ImageToTensor(
                cfg.data.val.pipeline)
        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
        val_dataloader = build_dataloader(
            val_dataset,
            samples_per_gpu=val_samples_per_gpu,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=distributed,
            shuffle=False)
        eval_cfg = cfg.get('evaluation', {})
        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
        eval_hook = DistEvalHook if distributed else EvalHook
        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
    # user-defined hooks
    if cfg.get('custom_hooks', None):
        custom_hooks = cfg.custom_hooks
        assert isinstance(custom_hooks, list), \
            f'custom_hooks expect list type, but got {type(custom_hooks)}'
        for hook_cfg in cfg.custom_hooks:
            assert isinstance(hook_cfg, dict), \
                'Each item in custom_hooks expects dict type, but got ' \
                f'{type(hook_cfg)}'
            hook_cfg = hook_cfg.copy()
            priority = hook_cfg.pop('priority', 'NORMAL')
            hook = build_from_cfg(hook_cfg, HOOKS)
            runner.register_hook(hook, priority=priority)
    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow)
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/test.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/test.py
 import argparse
 import mmcv
 import os
 import os.path as osp
 import torch
 import warnings
 from mmcv import Config, DictAction
 from mmcv.cnn import fuse_conv_bn
 from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
                         wrap_fp16_model)
 from mmdet3d.apis import single_gpu_test
 from mmdet3d.datasets import build_dataloader, build_dataset
 from mmdet3d.models import build_model
 from mmdet_test import multi_gpu_test
 from mmdet_train import set_random_seed
 from mmdet.datasets import replace_ImageToTensor
 def parse_args():
    parser = argparse.ArgumentParser(
        description='MMDet test (and eval) a model')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('checkpoint', type=str, help='checkpoint file')
    parser.add_argument('--split', type=str, required=True, help='which split to test on')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--fuse-conv-bn',
        action='store_true',
        help='Whether to fuse conv and bn, this will slightly increase'
        'the inference speed')
    parser.add_argument(
        '--format-only',
        action='store_true',
        help='Format the output results without perform evaluation. It is'
        'useful when you want to format the result to a specific format and '
        'submit it to the test server')
    parser.add_argument(
        '--eval',
        action='store_true',
        help='whether to run evaluation.')
    parser.add_argument(
        '--gpu-collect',
        action='store_true',
        help='whether to use gpu to collect results.')
    parser.add_argument(
        '--tmpdir',
        help='tmp directory used for collecting results from multiple '
        'workers, available when gpu-collect is not specified')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    args = parser.parse_args()
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)
    return args
 def main():
    args = parse_args()
    if args.split not in ['val', 'test']:
        raise ValueError('Please choose "val" or "test" split for testing')
    if (args.eval and args.format_only) or (not args.eval and not args.format_only):
        raise ValueError('Please specify exactly one operation (eval/format) '
        'with the argument "--eval" or "--format-only"')
    if args.eval and args.split == 'test':
        raise ValueError('Cannot evaluate on test set')
    cfg = Config.fromfile(args.config)
    # import modules from string list.
    if cfg.get('custom_imports', None):
        from mmcv.utils import import_modules_from_strings
        import_modules_from_strings(**cfg['custom_imports'])
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # import modules from plguin/xx, registry will be updated
    import sys
    sys.path.append(os.path.abspath('.'))  
    if hasattr(cfg, 'plugin'):
        if cfg.plugin:
            import importlib
            if hasattr(cfg, 'plugin_dir'):
                def import_path(plugin_dir):
                    _module_dir = os.path.dirname(plugin_dir)
                    _module_dir = _module_dir.split('/')
                    _module_path = _module_dir[0]
                    for m in _module_dir[1:]:
                        _module_path = _module_path + '.' + m
                    print(f'importing {_module_path}/')
                    plg_lib = importlib.import_module(_module_path)
                plugin_dirs = cfg.plugin_dir
                if not isinstance(plugin_dirs,list):
                    plugin_dirs = [plugin_dirs,]
                for plugin_dir in plugin_dirs:
                    import_path(plugin_dir)
            else:
                # import dir is the dirpath for the config file
                _module_dir = os.path.dirname(args.config)
                _module_dir = _module_dir.split('/')
                _module_path = _module_dir[0]
                for m in _module_dir[1:]:
                    _module_path = _module_path + '.' + m
                print(f'importing {_module_path}/')
                plg_lib = importlib.import_module(_module_path)
    cfg_data_dict = cfg.data.get(args.split)
    cfg.model.pretrained = None
    # in case the test dataset is concatenated
    samples_per_gpu = 1
    cfg_data_dict.test_mode = True
    samples_per_gpu = cfg_data_dict.pop('samples_per_gpu', 1)
    if samples_per_gpu > 1:
        # Replace 'ImageToTensor' to 'DefaultFormatBundle'
        cfg_data_dict.pipeline = replace_ImageToTensor(
            cfg_data_dict.pipeline)
    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
    # set random seeds
    if args.seed is not None:
        set_random_seed(args.seed, deterministic=args.deterministic)
    # build the dataloader
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join('./work_dirs',
                                osp.splitext(osp.basename(args.config))[0]) 
    cfg_data_dict.work_dir = cfg.work_dir
    print('work_dir: ',cfg.work_dir)
    dataset = build_dataset(cfg_data_dict)
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=distributed,
        shuffle=False)
    # build the model and load checkpoint
    cfg.model.train_cfg = None
    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_conv_bn(model)
    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, data_loader)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
                                args.gpu_collect)
    rank, _ = get_dist_info()
    if rank == 0:
        if args.format_only:
            dataset.format_results(outputs, prefix=cfg.work_dir)
        elif args.eval:
            print('start evaluation!')
            print(dataset.evaluate(outputs))
 if __name__ == '__main__':
    main()
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/train.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/train.py
 from __future__ import division
 import argparse
 import copy
 import mmcv
 import os
 import time
 import torch
 import warnings
 from mmcv import Config, DictAction
 from mmcv.runner import get_dist_info, init_dist
 from os import path as osp
 from mmdet import __version__ as mmdet_version
 from mmdet3d import __version__ as mmdet3d_version
 from mmdet3d.apis import train_model
 from mmdet3d.datasets import build_dataset
 from mmdet3d.utils import collect_env, get_root_logger
 from mmseg import __version__ as mmseg_version
 # warper
 from mmdet_train import set_random_seed
 # from builder import build_model
 from mmdet3d.models import build_model
 def parse_args():
    parser = argparse.ArgumentParser(description='Train a detector')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--no-validate',
        action='store_true',
        help='whether not to evaluate the checkpoint during training')
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpus',
        type=int,
        help='number of gpus to use '
        '(only applicable to non-distributed training)')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--options',
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file (deprecate), '
        'change to --cfg-options instead.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. If the value to '
        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
        'Note that the quotation marks are necessary and that no white space '
        'is allowed.')
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    parser.add_argument(
        '--autoscale-lr',
        action='store_true',
        help='automatically scale lr with the number of gpus')
    args = parser.parse_args()
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)
    if args.options and args.cfg_options:
        raise ValueError(
            '--options and --cfg-options cannot be both specified, '
            '--options is deprecated in favor of --cfg-options')
    if args.options:
        warnings.warn('--options is deprecated in favor of --cfg-options')
        args.cfg_options = args.options
    return args
 def main():
    args = parse_args()
    cfg = Config.fromfile(args.config)
    if args.cfg_options is not None:
        cfg.merge_from_dict(args.cfg_options)
    # import modules from string list.
    if cfg.get('custom_imports', None):
        from mmcv.utils import import_modules_from_strings
        import_modules_from_strings(**cfg['custom_imports'])
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # import modules, registry will be updated
    import sys
    sys.path.append(os.path.abspath('.'))    
    if hasattr(cfg, 'plugin'):
        if cfg.plugin:
            import importlib
            if hasattr(cfg, 'plugin_dir'):
                def import_path(plugin_dir):
                    _module_dir = os.path.dirname(plugin_dir)
                    _module_dir = _module_dir.split('/')
                    _module_path = _module_dir[0]
                    for m in _module_dir[1:]:
                        _module_path = _module_path + '.' + m
                    print(f'importing {_module_path}/')
                    plg_lib = importlib.import_module(_module_path)
                plugin_dirs = cfg.plugin_dir
                if not isinstance(plugin_dirs,list):
                    plugin_dirs = [plugin_dirs,]
                for plugin_dir in plugin_dirs:
                    import_path(plugin_dir)
            else:
                # import dir is the dirpath for the config file
                _module_dir = os.path.dirname(args.config)
                _module_dir = _module_dir.split('/')
                _module_path = _module_dir[0]
                for m in _module_dir[1:]:
                    _module_path = _module_path + '.' + m
                print(f'importing {_module_path}/')
                plg_lib = importlib.import_module(_module_path)
    # work_dir is determined in this priority: CLI > segment in file > filename
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join('./work_dirs',
                                osp.splitext(osp.basename(args.config))[0])
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.gpu_ids is not None:
        cfg.gpu_ids = args.gpu_ids
    else:
        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
    if args.autoscale_lr:
        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
        # re-set gpu_ids with distributed training mode
        _, world_size = get_dist_info()
        cfg.gpu_ids = range(world_size)
    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # dump config
    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
    # init the logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    # specify logger name, if we still use 'mmdet', the output info will be
    # filtered and won't be saved in the log_file
    # TODO: ugly workaround to judge whether we are training det or seg model
    if cfg.model.type in ['EncoderDecoder3D']:
        logger_name = 'mmseg'
    else:
        logger_name = 'mmdet'
    logger = get_root_logger(
        log_file=log_file, log_level=cfg.log_level, name=logger_name)
    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
                dash_line)
    meta['env_info'] = env_info
    meta['config'] = cfg.pretty_text
    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config:\n{cfg.pretty_text}')
    # set random seeds
    if args.seed is not None:
        logger.info(f'Set random seed to {args.seed}, '
                    f'deterministic: {args.deterministic}')
        set_random_seed(args.seed, deterministic=args.deterministic)
    cfg.seed = args.seed
    meta['seed'] = args.seed
    meta['exp_name'] = osp.basename(args.config)
    model = build_model(
        cfg.model,
        train_cfg=cfg.get('train_cfg'),
        test_cfg=cfg.get('test_cfg'))
    model.init_weights()
    logger.info(f'Model:\n{model}')
    cfg.data.train.work_dir = cfg.work_dir
    cfg.data.val.work_dir = cfg.work_dir
    datasets = [build_dataset(cfg.data.train)]
    if len(cfg.workflow) == 2:
        val_dataset = copy.deepcopy(cfg.data.val)
        # in case we use a dataset wrapper
        if 'dataset' in cfg.data.train:
            val_dataset.pipeline = cfg.data.train.dataset.pipeline
        else:
            val_dataset.pipeline = cfg.data.train.pipeline
        # set test_mode=False here in deep copied config
        # which do not affect AP/AR calculation later
        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
        val_dataset.test_mode = False
        datasets.append(build_dataset(val_dataset))
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(
            mmdet_version=mmdet_version,
            mmseg_version=mmseg_version,
            mmdet3d_version=mmdet3d_version,
            config=cfg.pretty_text,
            CLASSES=None,
            PALETTE=datasets[0].PALETTE  # for segmentors
            if hasattr(datasets[0], 'PALETTE') else None)
    # add an attribute for visualization convenience
    # model.CLASSES = datasets[0].CLASSES
    train_model(
        model,
        datasets,
        cfg,
        distributed=distributed,
        validate=(not args.no_validate),
        timestamp=timestamp,
        meta=meta)
 if __name__ == '__main__':
    main()
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/visualization/renderer.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/visualization/renderer.py
 import os.path as osp
 import os
 import numpy as np
 import copy
 import cv2
 import matplotlib.pyplot as plt
 from PIL import Image
 from shapely.geometry import LineString
 def remove_nan_values(uv):
    is_u_valid = np.logical_not(np.isnan(uv[:, 0]))
    is_v_valid = np.logical_not(np.isnan(uv[:, 1]))
    is_uv_valid = np.logical_and(is_u_valid, is_v_valid)
    uv_valid = uv[is_uv_valid]
    return uv_valid
 def points_ego2img(pts_ego, extrinsics, intrinsics):
    pts_ego_4d = np.concatenate([pts_ego, np.ones([len(pts_ego), 1])], axis=-1)
    pts_cam_4d = extrinsics @ pts_ego_4d.T
    uv = (intrinsics @ pts_cam_4d[:3, :]).T
    uv = remove_nan_values(uv)
    depth = uv[:, 2]
    uv = uv[:, :2] / uv[:, 2].reshape(-1, 1)
    return uv, depth
 def interp_fixed_dist(line, sample_dist):
        ''' Interpolate a line at fixed interval.
        Args:
            line (LineString): line
            sample_dist (float): sample interval
        Returns:
            points (array): interpolated points, shape (N, 2)
        '''
        distances = list(np.arange(sample_dist, line.length, sample_dist))
        # make sure to sample at least two points when sample_dist > line.length
        distances = [0,] + distances + [line.length,] 
        sampled_points = np.array([list(line.interpolate(distance).coords)
                                for distance in distances]).squeeze()
        return sampled_points
 def draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsics, color_bgr, thickness):
    # if 2-dimension, assume z=0
    if polyline_ego.shape[1] == 2:
        zeros = np.zeros((polyline_ego.shape[0], 1))
        polyline_ego = np.concatenate([polyline_ego, zeros], axis=1)
    polyline_ego = interp_fixed_dist(line=LineString(polyline_ego), sample_dist=0.2)
    uv, depth = points_ego2img(polyline_ego, extrinsics, intrinsics)
    h, w, c = img_bgr.shape
    is_valid_x = np.logical_and(0 <= uv[:, 0], uv[:, 0] < w - 1)
    is_valid_y = np.logical_and(0 <= uv[:, 1], uv[:, 1] < h - 1)
    is_valid_z = depth > 0
    is_valid_points = np.logical_and.reduce([is_valid_x, is_valid_y, is_valid_z])
    if is_valid_points.sum() == 0:
        return
    tmp_list = []
    for i, valid in enumerate(is_valid_points):
        if valid:
            tmp_list.append(uv[i])
        else:
            if len(tmp_list) >= 2:
                tmp_vector = np.stack(tmp_list)
                tmp_vector = np.round(tmp_vector).astype(np.int32)
                draw_visible_polyline_cv2(
                    copy.deepcopy(tmp_vector),
                    valid_pts_bool=np.ones((len(uv), 1), dtype=bool),
                    image=img_bgr,
                    color=color_bgr,
                    thickness_px=thickness,
                )
            tmp_list = []
    if len(tmp_list) >= 2:
        tmp_vector = np.stack(tmp_list)
        tmp_vector = np.round(tmp_vector).astype(np.int32)
        draw_visible_polyline_cv2(
            copy.deepcopy(tmp_vector),
            valid_pts_bool=np.ones((len(uv), 1), dtype=bool),
            image=img_bgr,
            color=color_bgr,
            thickness_px=thickness,
        )
    # uv = np.round(uv[is_valid_points]).astype(np.int32)
    # draw_visible_polyline_cv2(
    #     copy.deepcopy(uv),
    #     valid_pts_bool=np.ones((len(uv), 1), dtype=bool),
    #     image=img_bgr,
    #     color=color_bgr,
    #     thickness_px=thickness,
    # )
 def draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickness_px):
    """Draw a polyline onto an image using given line segments.
    Args:
        line: Array of shape (K, 2) representing the coordinates of line.
        valid_pts_bool: Array of shape (K,) representing which polyline coordinates are valid for rendering.
            For example, if the coordinate is occluded, a user might specify that it is invalid.
            Line segments touching an invalid vertex will not be rendered.
        image: Array of shape (H, W, 3), representing a 3-channel BGR image
        color: Tuple of shape (3,) with a BGR format color
        thickness_px: thickness (in pixels) to use when rendering the polyline.
    """
    line = np.round(line).astype(int)  # type: ignore
    for i in range(len(line) - 1):
        if (not valid_pts_bool[i]) or (not valid_pts_bool[i + 1]):
            continue
        x1 = line[i][0]
        y1 = line[i][1]
        x2 = line[i + 1][0]
        y2 = line[i + 1][1]
        # Use anti-aliasing (AA) for curves
        image = cv2.line(image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=thickness_px, lineType=cv2.LINE_AA)
 COLOR_MAPS_BGR = {
    # bgr colors
    'divider': (0, 0, 255),
    'boundary': (0, 255, 0),
    'ped_crossing': (255, 0, 0),
    'centerline': (51, 183, 255),
    'drivable_area': (171, 255, 255)
 }
 COLOR_MAPS_PLT = {
    'divider': 'r',
    'boundary': 'g',
    'ped_crossing': 'b',
    'centerline': 'orange',
    'drivable_area': 'y',
 }
 CAM_NAMES_AV2 = ['ring_front_center', 'ring_front_right', 'ring_front_left',
    'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left',
 ]
 class Renderer(object):
    """Render map elements on image views.
    Args:
        roi_size (tuple): bev range
    """
    def __init__(self, roi_size):
        self.roi_size = roi_size
    def render_bev_from_vectors(self, vectors, out_dir):
        '''Plot vectorized map elements on BEV.
        Args:
            vectors (dict): dict of vectorized map elements.
            out_dir (str): output directory
        '''
        car_img = Image.open('resources/images/car.png')
        map_path = os.path.join(out_dir, 'map.jpg')
        plt.figure(figsize=(self.roi_size[0], self.roi_size[1]))
        plt.xlim(-self.roi_size[0]/2 - 1, self.roi_size[0]/2 + 1)
        plt.ylim(-self.roi_size[1]/2 - 1, self.roi_size[1]/2 + 1)
        plt.axis('off')
        plt.imshow(car_img, extent=[-1.5, 1.5, -1.2, 1.2])
        for cat, vector_list in vectors.items():
            color = COLOR_MAPS_PLT[cat]
            for vector in vector_list:
                pts = np.array(vector)[:, :2]
                x = np.array([pt[0] for pt in pts])
                y = np.array([pt[1] for pt in pts])
                # plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], angles='xy', color=color,
                #     scale_units='xy', scale=1)
                plt.plot(x, y, color=color, linewidth=5, marker='o', linestyle='-', markersize=20)
        plt.savefig(map_path, bbox_inches='tight', dpi=40)
        plt.close()
    def render_camera_views_from_vectors(self, vectors, imgs, extrinsics, 
            intrinsics, thickness, out_dir):
        '''Project vectorized map elements to camera views.
        Args:
            vectors (dict): dict of vectorized map elements.
            imgs (tensor): images in bgr color.
            extrinsics (array): ego2img extrinsics, shape (4, 4)
            intrinsics (array): intrinsics, shape (3, 3) 
            thickness (int): thickness of lines to draw on images.
            out_dir (str): output directory
        '''
        for i in range(len(imgs)):
            img = imgs[i]
            extrinsic = extrinsics[i]
            intrinsic = intrinsics[i]
            img_bgr = copy.deepcopy(img)
            for cat, vector_list in vectors.items():
                color = COLOR_MAPS_BGR[cat]
                for vector in vector_list:
                    img_bgr = np.ascontiguousarray(img_bgr)
                    vector_array = np.array(vector)
                    if vector_array.shape[1] > 3:
                        vector_array = vector_array[:, :3]
                    draw_polyline_ego_on_img(vector_array, img_bgr, extrinsic, intrinsic, 
                       color, thickness)
            out_path = osp.join(out_dir, CAM_NAMES_AV2[i]) + '.jpg'
            cv2.imwrite(out_path, img_bgr)