UpDate README.md

f3b13cad · yeshenglong1 · 0797920d · f3b13cad · f3b13cad · f3b13cad
Commit f3b13cad authored May 17, 2023 by yeshenglong1
20 changed files
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/loading.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/loading.py
 import mmcv
 import numpy as np
 from mmdet.datasets.builder import PIPELINES
 @PIPELINES.register_module(force=True)
 class LoadMultiViewImagesFromFiles(object):
    """Load multi channel images from a list of separate channel files.
    Expects results['img_filename'] to be a list of filenames.
    Args:
        to_float32 (bool): Whether to convert the img to float32.
            Defaults to False.
        color_type (str): Color type of the file. Defaults to 'unchanged'.
    """
    def __init__(self, to_float32=False, color_type='unchanged'):
        self.to_float32 = to_float32
        self.color_type = color_type
    def __call__(self, results):
        """Call function to load multi-view image from files.
        Args:
            results (dict): Result dict containing multi-view image filenames.
        Returns:
            dict: The result dict containing the multi-view image data. \
                Added keys and values are described below.
                - filename (str): Multi-view image filenames.
                - img (np.ndarray): Multi-view image arrays.
                - img_shape (tuple[int]): Shape of multi-view image arrays.
                - ori_shape (tuple[int]): Shape of original image arrays.
                - pad_shape (tuple[int]): Shape of padded image arrays.
                - scale_factor (float): Scale factor.
                - img_norm_cfg (dict): Normalization configuration of images.
        """
        filename = results['img_filenames']
        img = [mmcv.imread(name, self.color_type) for name in filename]
        if self.to_float32:
            img = [i.astype(np.float32) for i in img]
        results['img'] = img
        results['img_shape'] = [i.shape for i in img]
        results['ori_shape'] = [i.shape for i in img]
        # Set initial values for default meta_keys
        results['pad_shape'] = [i.shape for i in img]
        # results['scale_factor'] = 1.0
        num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2]
        results['img_norm_cfg'] = dict(
            mean=np.zeros(num_channels, dtype=np.float32),
            std=np.ones(num_channels, dtype=np.float32),
            to_rgb=False)
        results['img_fields'] = ['img']
        return results
    def __repr__(self):
        """str: Return a string that describes the module."""
        return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\
            f"color_type='{self.color_type}')"
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/poly_bbox.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/poly_bbox.py
 import numpy as np
 from mmdet.datasets.builder import PIPELINES
 from shapely.geometry import LineString
 @PIPELINES.register_module(force=True)
 class PolygonizeLocalMapBbox(object):
    """Pre-Processing used by vectormapnet model.
    Args:
        canvas_size (tuple or list): bev feature size
        coord_dim (int): dimension of point's coordinate
        num_class (int): number of classes
        threshold (float): threshold for minimum bounding box size
    """
    def __init__(self,
                 canvas_size=(200, 100),
                 coord_dim=2,
                 num_class=3,
                 threshold=6/200,
                 ):
        self.canvas_size = np.array(canvas_size)
        self.num_class = num_class
        # for keypoints
        self.threshold = threshold
        self.coord_dim = coord_dim
        self.map_stop_idx = 0
        self.coord_dim_start_idx = 1
    def format_polyline_map(self, vectors):
        polylines, polyline_masks, polyline_weights = [], [], []
        # quantilize each label's lines individually.
        for label, _lines in vectors.items():
            for polyline in _lines:
                # and pad polyline.
                if label == 2:
                    polyline_weight = evaluate_line(polyline).reshape(-1)
                else:
                    polyline_weight = np.ones_like(polyline).reshape(-1)
                    polyline_weight = np.pad(
                        polyline_weight, ((0, 1),), constant_values=1.)
                    polyline_weight = polyline_weight/polyline_weight.sum()
                # flatten and quantilized
                fpolyline = quantize_verts(
                    polyline, self.canvas_size, self.coord_dim)
                fpolyline = fpolyline.reshape(-1)
                # reindex starting from 1, and add a zero stopping token(EOS),
                fpolyline = \
                    np.pad(fpolyline + self.coord_dim_start_idx, ((0, 1),),
                            constant_values=0)
                fpolyline_msk = np.ones(fpolyline.shape, dtype=np.bool)
                polyline_masks.append(fpolyline_msk)
                polyline_weights.append(polyline_weight)
                polylines.append(fpolyline)
        polyline_map = polylines
        polyline_map_mask = polyline_masks
        polyline_map_weights = polyline_weights
        return polyline_map, polyline_map_mask, polyline_map_weights
    def format_keypoint(self, vectors):
        kps, kp_labels = [], []
        qkps, qkp_masks = [], []
        # quantilize each label's lines individually.
        for label, _lines in vectors.items():
            for polyline in _lines:
                kp = get_bbox(polyline, self.threshold)
                kps.append(kp)
                kp_labels.append(label)
                gkp = kp
                # flatten and quantilized
                fkp = quantize_verts(gkp, self.canvas_size, self.coord_dim)
                fkp = fkp.reshape(-1)
                fkps_msk = np.ones(fkp.shape, dtype=np.bool)
                qkp_masks.append(fkps_msk)
                qkps.append(fkp)
        qkps = np.stack(qkps)
        qkp_msks = np.stack(qkp_masks)
        # format det
        kps = np.stack(kps, axis=0).astype(np.float32)*self.canvas_size
        kp_labels = np.array(kp_labels)
        # restrict the boundary
        kps[..., 0] = np.clip(kps[..., 0], 0.1, self.canvas_size[0]-0.1)
        kps[..., 1] = np.clip(kps[..., 1], 0.1, self.canvas_size[1]-0.1)
        # nbox, boxsize(4)*coord_dim(2)
        kps = kps.reshape(kps.shape[0], -1)
        # unflatten_seq(qkps)
        return kps, kp_labels, qkps, qkp_msks,
    def Polygonization(self, input_dict):
        '''
            Process vertices.
        '''
        vectors = input_dict['vectors']
        n_lines = 0
        for label, lines in vectors.items():
            n_lines += len(lines)
        if not n_lines:
            input_dict['polys'] = []
            return input_dict
        polyline_map, polyline_map_mask, polyline_map_weight = \
            self.format_polyline_map(vectors)
        keypoint, keypoint_label, qkeypoint, qkeypoint_mask = \
            self.format_keypoint(vectors)
        # gather
        polys = {
            # for det
            'keypoint': keypoint,
            'det_label': keypoint_label,
            # for gen
            'gen_label': keypoint_label,
            'qkeypoint': qkeypoint,
            'qkeypoint_mask': qkeypoint_mask,
            'polylines': polyline_map,  # List[array]
            'polyline_masks': polyline_map_mask,  # List[array]
            'polyline_weights': polyline_map_weight
        }
        # Format outputs
        input_dict['polys'] = polys
        return input_dict
    def __call__(self, input_dict):
        input_dict = self.Polygonization(input_dict)
        return input_dict
 def evaluate_line(polyline):
    edge = np.linalg.norm(polyline[1:] - polyline[:-1], axis=-1)
    start_end_weight = edge[(0, -1), ].copy()
    mid_weight = (edge[:-1] + edge[1:]) * .5
    pts_weight = np.concatenate(
        (start_end_weight[:1], mid_weight, start_end_weight[-1:]))
    denominator = pts_weight.sum()
    denominator = 1 if denominator == 0 else denominator
    pts_weight /= denominator
    # add weights for stop index
    pts_weight = np.repeat(pts_weight, 2)/2
    pts_weight = np.pad(pts_weight, ((0, 1)),
                        constant_values=1/(len(polyline)*2))
    return pts_weight
 def quantize_verts(verts, canvas_size, coord_dim):
    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
    Args:
        verts (array): vertices coordinates, shape (seqlen, coords_dim)
        canvas_size (tuple): bev feature size
        coord_dim (int): dimension of point coordinates
    Returns:
        quantized_verts (array): quantized vertices, shape (seqlen, coords_dim)
    """
    min_range = 0
    max_range = 1
    range_quantize = np.array(canvas_size) - 1  # (0-199) = 200
    verts_ratio = (verts[:, :coord_dim] - min_range) / (
        max_range - min_range)
    verts_quantize = verts_ratio * range_quantize[:coord_dim]
    return verts_quantize.astype('int32')
 def get_bbox(polyline, threshold):
    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
    Args:
        polyline (array): point coordinates, shape (seqlen, 2)
        threshold (float): threshold for minimum bbox size
    Returns:
        bbox (array): bounding box in xyxy format, shape (2, 2)
    """
    eps = 1e-4
    polyline = LineString(polyline)
    bbox = polyline.bounds
    minx, miny, maxx, maxy = bbox
    W, H = maxx-minx, maxy-miny
    if W < threshold or H < threshold:
        remain = max((threshold - min(W, H))/2, eps)
        bbox = polyline.buffer(remain).envelope.bounds
        minx, miny, maxx, maxy = bbox
    bbox_np = np.array([[minx, miny], [maxx, maxy]])
    bbox_np = np.clip(bbox_np, 0., 1.)
    return bbox_np
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/transform.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/transform.py
 import numpy as np
 import mmcv
 from mmdet.datasets.builder import PIPELINES
 @PIPELINES.register_module(force=True)
 class Normalize3D(object):
    """Normalize the image.
    Added key is "img_norm_cfg".
    Args:
        mean (sequence): Mean values of 3 channels.
        std (sequence): Std values of 3 channels.
        to_rgb (bool): Whether to convert the image from BGR to RGB,
            default is true.
    """
    def __init__(self, mean, std, to_rgb=True):
        self.mean = np.array(mean, dtype=np.float32)
        self.std = np.array(std, dtype=np.float32)
        self.to_rgb = to_rgb
    def __call__(self, results):
        """Call function to normalize images.
        Args:
            results (dict): Result dict from loading pipeline.
        Returns:
            dict: Normalized results, 'img_norm_cfg' key is added into
                result dict.
        """
        for key in results.get('img_fields', ['img']):
            results[key] = [mmcv.imnormalize(
                img, self.mean, self.std, self.to_rgb) for img in results[key]]
        results['img_norm_cfg'] = dict(
            mean=self.mean, std=self.std, to_rgb=self.to_rgb)
        return results
    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'
        return repr_str
 @PIPELINES.register_module(force=True)
 class PadMultiViewImages(object):
    """Pad multi-view images and change intrinsics
    There are two padding modes: (1) pad to a fixed size and (2) pad to the
    minimum size that is divisible by some number.
    Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed.
    Args:
        size (tuple, optional): Fixed padding size, (h, w).
        size_divisor (int, optional): The divisor of padded size.
        pad_val (float, optional): Padding value, 0 by default.
        change_intrinsics (bool): whether to update intrinsics.
    """
    def __init__(self, size=None, size_divisor=None, pad_val=0, change_intrinsics=False):
        self.size = size
        self.size_divisor = size_divisor
        self.pad_val = pad_val
        # only one of size and size_divisor should be valid
        assert size is not None or size_divisor is not None
        assert size is None or size_divisor is None
        self.change_intrinsics = change_intrinsics
    def _pad_img(self, results):
        """Pad images according to ``self.size``."""
        original_shape = [img.shape for img in results['img']]
        for key in results.get('img_fields', ['img']):
            if self.size is not None:
                padded_img = [mmcv.impad(
                    img, shape=self.size, pad_val=self.pad_val) for img in results[key]]
            elif self.size_divisor is not None:
                padded_img = [mmcv.impad_to_multiple(
                    img, self.size_divisor, pad_val=self.pad_val) for img in results[key]]
            results[key] = padded_img
        if self.change_intrinsics:
            post_intrinsics, post_ego2imgs = [], []
            for img, oshape, cam_intrinsic, ego2img in zip(results['img'], \
                    original_shape, results['cam_intrinsics'], results['ego2img']):
                scaleW = img.shape[1] / oshape[1]
                scaleH = img.shape[0] / oshape[0]
                rot_resize_matrix = np.array([ 
                                        [scaleW, 0,      0,    0],
                                        [0,      scaleH, 0,    0],
                                        [0,      0,      1,    0],
                                        [0,      0,      0,    1]])
                post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic
                post_ego2img = rot_resize_matrix @ ego2img
                post_intrinsics.append(post_intrinsic)
                post_ego2imgs.append(post_ego2img)
            results.update({
                'cam_intrinsics': post_intrinsics,
                'ego2img': post_ego2imgs,
            })
        results['img_shape'] = [img.shape for img in padded_img]
        results['img_fixed_size'] = self.size
        results['img_size_divisor'] = self.size_divisor
    def __call__(self, results):
        """Call function to pad images, masks, semantic segmentation maps.
        Args:
            results (dict): Result dict from loading pipeline.
        Returns:
            dict: Updated result dict.
        """
        self._pad_img(results)
        return results
    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f'(size={self.size}, '
        repr_str += f'size_divisor={self.size_divisor}, '
        repr_str += f'pad_val={self.pad_val})'
        repr_str += f'change_intrinsics={self.change_intrinsics})'
        return repr_str
 @PIPELINES.register_module(force=True)
 class ResizeMultiViewImages(object):
    """Resize mulit-view images and change intrinsics
    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed
    Args:
        size (tuple, optional): resize target size, (h, w).
        change_intrinsics (bool): whether to update intrinsics.
    """
    def __init__(self, size, change_intrinsics=True):
        self.size = size
        self.change_intrinsics = change_intrinsics
    def __call__(self, results:dict):
        new_imgs, post_intrinsics, post_ego2imgs = [], [], []
        for img,  cam_intrinsic, ego2img in zip(results['img'], \
                results['cam_intrinsics'], results['ego2img']):
            tmp, scaleW, scaleH = mmcv.imresize(img,
                                                # NOTE: mmcv.imresize expect (w, h) shape
                                                (self.size[1], self.size[0]),
                                                return_scale=True)
            new_imgs.append(tmp)
            rot_resize_matrix = np.array([
                [scaleW, 0,      0,    0],
                [0,      scaleH, 0,    0],
                [0,      0,      1,    0],
                [0,      0,      0,    1]])
            post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic
            post_ego2img = rot_resize_matrix @ ego2img
            post_intrinsics.append(post_intrinsic)
            post_ego2imgs.append(post_ego2img)
        results['img'] = new_imgs
        results['img_shape'] = [img.shape for img in new_imgs]
        if self.change_intrinsics:
            results.update({
                'cam_intrinsics': post_intrinsics,
                'ego2img': post_ego2imgs,
            })
        return results
    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f'(size={self.size}, '
        repr_str += f'change_intrinsics={self.change_intrinsics})'
        return repr_str
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/vectorize.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/vectorize.py
 import numpy as np
 from mmdet.datasets.builder import PIPELINES
 from shapely.geometry import LineString
 from numpy.typing import NDArray
 from typing import List, Tuple, Union, Dict
 @PIPELINES.register_module(force=True)
 class VectorizeMap(object):
    """Generate vectoized map and put into `semantic_mask` key.
    Concretely, shapely geometry objects are converted into sample points (ndarray).
    We use args `sample_num`, `sample_dist`, `simplify` to specify sampling method.
    Args:
        roi_size (tuple or list): bev range .
        normalize (bool): whether to normalize points to range (0, 1).
        coords_dim (int): dimension of point coordinates.
        simplify (bool): whether to use simpily function. If true, `sample_num` \
            and `sample_dist` will be ignored.
        sample_num (int): number of points to interpolate from a polyline. Set to -1 to ignore.
        sample_dist (float): interpolate distance. Set to -1 to ignore.
    """
    def __init__(self, 
                 roi_size: Union[Tuple, List], 
                 normalize: bool,
                 coords_dim: int,
                 simplify: bool=False, 
                 sample_num: int=-1, 
                 sample_dist: float=-1, 
        ):
        self.coords_dim = coords_dim
        self.sample_num = sample_num
        self.sample_dist = sample_dist
        self.roi_size = np.array(roi_size)
        self.normalize = normalize
        self.simplify = simplify
        self.sample_fn = None
        if sample_dist > 0:
            assert sample_num < 0 and not simplify
            self.sample_fn = self.interp_fixed_dist
        if sample_num > 0:
            assert sample_dist < 0 and not simplify
            self.sample_fn = self.interp_fixed_num
    def interp_fixed_num(self, line: LineString) -> NDArray:
        ''' Interpolate a line to fixed number of points.
        Args:
            line (LineString): line
        Returns:
            points (array): interpolated points, shape (N, 2)
        '''
        distances = np.linspace(0, line.length, self.sample_num)
        sampled_points = np.array([list(line.interpolate(distance).coords) 
            for distance in distances]).squeeze()
        return sampled_points
    def interp_fixed_dist(self, line: LineString) -> NDArray:
        ''' Interpolate a line at fixed interval.
        Args:
            line (LineString): line
        Returns:
            points (array): interpolated points, shape (N, 2)
        '''
        distances = list(np.arange(self.sample_dist, line.length, self.sample_dist))
        # make sure to sample at least two points when sample_dist > line.length
        distances = [0,] + distances + [line.length,] 
        sampled_points = np.array([list(line.interpolate(distance).coords)
                                for distance in distances]).squeeze()
        return sampled_points
    def get_vectorized_lines(self, map_geoms: Dict) -> Dict:
        ''' Vectorize map elements. Iterate over the input dict and apply the 
        specified sample funcion.
        Args:
            line (LineString): line
        Returns:
            vectors (array): dict of vectorized map elements.
        '''
        vectors = {}
        for label, geom_list in map_geoms.items():
            vectors[label] = []
            for geom in geom_list:
                if geom.geom_type == 'LineString':
                    geom = LineString(np.array(geom.coords)[:, :self.coords_dim])
                    if self.simplify:
                        line = geom.simplify(0.2, preserve_topology=True)
                        line = np.array(line.coords)
                    elif self.sample_fn:
                        line = self.sample_fn(geom)
                    else:
                        line = np.array(line.coords)
                    if self.normalize:
                        line = self.normalize_line(line)
                    vectors[label].append(line)
                elif geom.geom_type == 'Polygon':
                    # polygon objects will not be vectorized
                    continue
                else:
                    raise ValueError('map geoms must be either LineString or Polygon!')
        return vectors
    def normalize_line(self, line: NDArray) -> NDArray:
        ''' Convert points to range (0, 1).
        Args:
            line (LineString): line
        Returns:
            normalized (array): normalized points.
        '''
        origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
        line[:, :2] = line[:, :2] - origin
        # transform from range [0, 1] to (0, 1)
        eps = 2
        line[:, :2] = line[:, :2] / (self.roi_size + eps)
        return line
    def __call__(self, input_dict):
        map_geoms = input_dict['map_geoms']
        input_dict['vectors'] = self.get_vectorized_lines(map_geoms)
        return input_dict
    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f'(simplify={self.simplify}, '
        repr_str += f'sample_num={self.sample_num}), '
        repr_str += f'sample_dist={self.sample_dist}), ' 
        repr_str += f'roi_size={self.roi_size})'
        repr_str += f'normalize={self.normalize})'
        repr_str += f'coords_dim={self.coords_dim})'
        return repr_str
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/__init__.py
 from .backbones import *
 from .heads import *
 from .losses import *
 from .mapers import *
 from .transformer_utils import *
 from .assigner import *
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/__init__.py
 from .assigner import HungarianLinesAssigner
 from .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, IoUCostC, BBoxCostC, LinesCost, LinesFixNumChamferCost, ClsSigmoidCost
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/assigner.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/assigner.py
 import torch
 from mmdet.core.bbox.builder import BBOX_ASSIGNERS
 from mmdet.core.bbox.assigners import AssignResult
 from mmdet.core.bbox.assigners import BaseAssigner
 from mmdet.core.bbox.match_costs import build_match_cost
 try:
    from scipy.optimize import linear_sum_assignment
 except ImportError:
    linear_sum_assignment = None
 @BBOX_ASSIGNERS.register_module()
 class HungarianLinesAssigner(BaseAssigner):
    """
        Computes one-to-one matching between predictions and ground truth.
        This class computes an assignment between the targets and the predictions
        based on the costs. The costs are weighted sum of three components:
        classification cost and regression L1 cost. The
        targets don't include the no_object, so generally there are more
        predictions than targets. After the one-to-one matching, the un-matched
        are treated as backgrounds. Thus each query prediction will be assigned
        with `0` or a positive integer indicating the ground truth index:
        - 0: negative sample, no assigned gt
        - positive integer: positive sample, index (1-based) of assigned gt
        Args:
            cls_weight (int | float, optional): The scale factor for classification
                cost. Default 1.0.
            bbox_weight (int | float, optional): The scale factor for regression
                L1 cost. Default 1.0.
    """
    def __init__(self,
                 cost=dict(
                     type='MapQueriesCost',
                     cls_cost=dict(type='ClassificationCost', weight=1.),
                     reg_cost=dict(type='LinesCost', weight=1.0),
                    ),
                 pc_range=None, 
                 **kwargs):
        self.pc_range = pc_range
        self.cost = build_match_cost(cost)
    def assign(self,
               preds: dict,
               gts: dict,
               gt_bboxes_ignore=None,
               eps=1e-7):
        """
            Computes one-to-one matching based on the weighted costs.
            This method assign each query prediction to a ground truth or
            background. The `assigned_gt_inds` with -1 means don't care,
            0 means negative sample, and positive number is the index (1-based)
            of assigned gt.
            The assignment is done in the following steps, the order matters.
            1. assign every prediction to -1
            2. compute the weighted costs
            3. do Hungarian matching on CPU based on the costs
            4. assign all to 0 (background) first, then for each matched pair
            between predictions and gts, treat this prediction as foreground
            and assign the corresponding gt index (plus 1) to it.
            Args:
                lines_pred (Tensor): predicted normalized lines:
                    [num_query, num_points, 2]
                cls_pred (Tensor): Predicted classification logits, shape
                    [num_query, num_class].
                Note: when compute bbox l1 loss, velocity is not included!!
                lines_gt (Tensor): Ground truth lines
                    [num_gt, num_points, 2].
                labels_gt (Tensor): Label of `gt_bboxes`, shape (num_gt,).
                gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
                    labelled as `ignored`. Default None.
                eps (int | float, optional): A value added to the denominator for
                    numerical stability. Default 1e-7.
            Returns:
                :obj:`AssignResult`: The assigned result.
        """
        assert gt_bboxes_ignore is None, \
            'Only case when gt_bboxes_ignore is None is supported.'
        num_gts, num_lines = gts['lines'].size(0), preds['lines'].size(0)
        # 1. assign -1 by default
        assigned_gt_inds = \
            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)
        assigned_labels = \
            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)
        if num_gts == 0 or num_lines == 0:
            # No ground truth or boxes, return empty assignment
            if num_gts == 0:
                # No ground truth, assign all to background
                assigned_gt_inds[:] = 0
            return AssignResult(
                num_gts, assigned_gt_inds, None, labels=assigned_labels)
        # 2. compute the weighted costs
        cost = self.cost(preds, gts)
        # 3. do Hungarian matching on CPU using linear_sum_assignment
        cost = cost.detach().cpu().numpy()
        if linear_sum_assignment is None:
            raise ImportError('Please run "pip install scipy" '
                              'to install scipy first.')
        try:
            matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
        except:
            print('cost max{}, min{}'.format(cost.max(), cost.min()))
            import ipdb; ipdb.set_trace()
        matched_row_inds = torch.from_numpy(matched_row_inds).to(
            preds['lines'].device)
        matched_col_inds = torch.from_numpy(matched_col_inds).to(
            preds['lines'].device)
        # 4. assign backgrounds and foregrounds
        # assign all indices to backgrounds first
        assigned_gt_inds[:] = 0
        # assign foregrounds based on matching results
        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
        assigned_labels[matched_row_inds] = gts['labels'][matched_col_inds]
        return AssignResult(
            num_gts, assigned_gt_inds, None, labels=assigned_labels)
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/match_cost.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/match_cost.py
 import torch
 from mmdet.core.bbox.match_costs.builder import MATCH_COST
 from mmdet.core.bbox.match_costs import build_match_cost
 from mmdet.core.bbox.iou_calculators import bbox_overlaps
 from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy
 def chamfer_distance(pred, gt):
    '''
    Args:
    pred: [num_points, 2]
    gt: [num_gt, 2]
    Out: torch.FloatTensor of shape (1, )
    '''
    # [num_points, num_gt]
    dist_mat = torch.cdist(pred, gt, p=2)
    # [num_points]
    dist_pred, _ = torch.min(dist_mat, dim=-1)
    dist_pred = torch.clamp(dist_pred, max=2.0)
    dist_pred = dist_pred.mean()
    dist_gt, _ = torch.min(dist_mat, dim=0)
    dist_gt = torch.clamp(dist_gt, max=2.0)
    dist_gt = dist_gt.mean()
    dist = dist_pred + dist_gt
    return dist
 @MATCH_COST.register_module()
 class ClsSigmoidCost:
    """ClsSoftmaxCost.
     Args:
         weight (int | float, optional): loss_weight
    """
    def __init__(self, weight=1.):
        self.weight = weight
    def __call__(self, cls_pred, gt_labels):
        """
        Args:
            cls_pred (Tensor): Predicted classification logits, shape
                [num_query, num_class].
            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
        Returns:
            torch.Tensor: cls_cost value with weight
        """
        # Following the official DETR repo, contrary to the loss that
        # NLL is used, we approximate it in 1 - cls_score[gt_label].
        # The 1 is a constant that doesn't change the matching,
        # so it can be omitted.
        cls_score = cls_pred.sigmoid()
        cls_cost = -cls_score[:, gt_labels]
        return cls_cost * self.weight
 @MATCH_COST.register_module()
 class LinesFixNumChamferCost(object):
    """BBox3DL1Cost.
     Args:
         weight (int | float, optional): loss_weight
    """
    def __init__(self, weight=1.):
        self.weight = weight
    def __call__(self, lines_pred, gt_lines):
        """
        Args:
            lines_pred (Tensor): predicted normalized lines:
                [num_query, num_points, 2]
            gt_lines (Tensor): Ground truth lines
                [num_gt, num_points, 2]
        Returns:
            torch.Tensor: reg_cost value with weight
                shape [num_pred, num_gt]
        """
        num_gts, num_bboxes = gt_lines.size(0), lines_pred.size(0)
        dist_mat = lines_pred.new_full((num_bboxes, num_gts),
                                       1.0,)
        for i in range(num_bboxes):
            for j in range(num_gts):
                dist_mat[i, j] = chamfer_distance(
                    lines_pred[i], gt_lines[j])
        return dist_mat * self.weight
 @MATCH_COST.register_module()
 class LinesCost(object):
    """LinesL1Cost.
     Args:
         weight (int | float, optional): loss_weight
    """
    def __init__(self, weight=1.):
        self.weight = weight
    def __call__(self, lines_pred, gt_lines, **kwargs):
        """
        Args:
            lines_pred (Tensor): predicted normalized lines:
                [num_query, num_points, 2]
            gt_lines (Tensor): Ground truth lines
                [num_gt, num_points, 2]
        Returns:
            torch.Tensor: reg_cost value with weight
                shape [num_pred, num_gt]
        """
        gt_revser = torch.flip(gt_lines, dims=[-2])
        gt_revser_flat = gt_revser.flatten(1, 2)
        pred_flat = lines_pred.flatten(1, 2)
        gt_flat = gt_lines.flatten(1, 2)
        div_ = pred_flat.size(-1)
        dist_mat = torch.cdist(pred_flat, gt_flat, p=1) / div_
        return dist_mat * self.weight
 @MATCH_COST.register_module()
 class BBoxCostC:
    """BBoxL1Cost.
     Args:
         weight (int | float, optional): loss_weight
         box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN
     Examples:
         >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost
         >>> import torch
         >>> self = BBoxL1Cost()
         >>> bbox_pred = torch.rand(1, 4)
         >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
         >>> factor = torch.tensor([10, 8, 10, 8])
         >>> self(bbox_pred, gt_bboxes, factor)
         tensor([[1.6172, 1.6422]])
    """
    def __init__(self, weight=1., box_format='xyxy'):
        self.weight = weight
        assert box_format in ['xyxy', 'xywh']
        self.box_format = box_format
    def __call__(self, bbox_pred, gt_bboxes):
        """
        Args:
            bbox_pred (Tensor): Predicted boxes with normalized coordinates
                (cx, cy, w, h), which are all in range [0, 1]. Shape
                [num_query, 4].
            gt_bboxes (Tensor): Ground truth boxes with normalized
                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
        Returns:
            torch.Tensor: bbox_cost value with weight
        """
        # if self.box_format == 'xywh':
        #     gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)
        # elif self.box_format == 'xyxy':
        #     bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)
        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
        return bbox_cost * self.weight
 @MATCH_COST.register_module()
 class IoUCostC:
    """IoUCost.
     Args:
         iou_mode (str, optional): iou mode such as 'iou' | 'giou'
         weight (int | float, optional): loss weight
     Examples:
         >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
         >>> import torch
         >>> self = IoUCost()
         >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
         >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
         >>> self(bboxes, gt_bboxes)
         tensor([[-0.1250,  0.1667],
                [ 0.1667, -0.5000]])
    """
    def __init__(self, iou_mode='giou', weight=1., box_format='xywh'):
        self.weight = weight
        self.iou_mode = iou_mode
        assert box_format in ['xyxy', 'xywh']
        self.box_format = box_format
    def __call__(self, bboxes, gt_bboxes):
        """
        Args:
            bboxes (Tensor): Predicted boxes with unnormalized coordinates
                (x1, y1, x2, y2). Shape [num_query, 4].
            gt_bboxes (Tensor): Ground truth boxes with unnormalized
                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
        Returns:
            torch.Tensor: iou_cost value with weight
        """
        if self.box_format == 'xywh':
            bboxes = bbox_cxcywh_to_xyxy(bboxes)
            gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)
        # overlaps: [num_bboxes, num_gt]
        overlaps = bbox_overlaps(
            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)
        # The 1 is a constant that doesn't change the matching, so omitted.
        iou_cost = -overlaps
        return iou_cost * self.weight
 @MATCH_COST.register_module()
 class DynamicLinesCost(object):
    """LinesL1Cost.
     Args:
         weight (int | float, optional): loss_weight
    """
    def __init__(self, weight=1.):
        self.weight = weight
    def __call__(self, lines_pred, lines_gt, masks_pred, masks_gt):
        """
        Args:
            lines_pred (Tensor): predicted normalized lines:
                [nP, num_points, 2]
            lines_gt (Tensor): Ground truth lines
                [nG, num_points, 2]
            masks_pred: [nP, num_points]
            masks_gt: [nG, num_points]
        Returns:
            dist_mat: reg_cost value with weight
                shape [nP, nG]
        """
        dist_mat = self.cal_dist(lines_pred, lines_gt)
        dist_mat = self.get_dynamic_line(dist_mat, masks_pred, masks_gt)
        dist_mat = dist_mat * self.weight
        return dist_mat
    def cal_dist(self, x1, x2):
        '''
            Args:
                x1: B1,N,2
                x2: B2,N,2
            Return:
                dist_mat: B1,B2,N
        '''
        x1 = x1.permute(1, 0, 2)
        x2 = x2.permute(1, 0, 2)
        dist_mat = torch.cdist(x1, x2, p=2)
        dist_mat = dist_mat.permute(1, 2, 0)
        return dist_mat
    def get_dynamic_line(self, mat, m1, m2):
        '''
            get dynamic line with difference approach
            mat: N1xN2xnpts
            m1: N1xnpts
            m2: N2xnpts
        '''
        # nPxnGxnum_points
        m1 = m1.unsqueeze(1).sigmoid() > 0.5
        m2 = m2.unsqueeze(0)
        valid_points_mask = (m1 + m2)/2.
        average_factor_mask = valid_points_mask.sum(-1) > 0
        average_factor = average_factor_mask.masked_fill(
            ~average_factor_mask, 1)
        # takes the average
        mat = mat * valid_points_mask
        mat = mat.sum(-1) / average_factor
        return mat
 @MATCH_COST.register_module()
 class BBoxLogitsCost(object):
    """BBoxLogits.
     Args:
         weight (int | float, optional): loss_weight
    """
    def __init__(self, weight=1.):
        self.weight = weight
    def calNLL(self, logits, value):
        '''
            Args:
                logits: B1, 8, cls_dim
                value: B2, 8,
            Return:
                log_likelihood: B1,B2,8
        '''
        logits = logits[:, None]
        value = value[None]
        value = value.long().unsqueeze(-1)
        value, log_pmf = torch.broadcast_tensors(value, logits)
        value = value[..., :1]
        return log_pmf.gather(-1, value).squeeze(-1)
    def __call__(self, bbox_pred, bbox_gt, **kwargs):
        """
        Args:
            bbox_pred: nproposal, 4*2, pos_dim
            bbox_gt: ngt, 4*2
        Returns:
            cost: nproposal, ngt
        """
        cost = self.calNLL(bbox_pred, bbox_gt).mean(-1)
        return cost * self.weight
 @MATCH_COST.register_module()
 class MapQueriesCost(object):
    def __init__(self, cls_cost, reg_cost, iou_cost=None):
        self.cls_cost = build_match_cost(cls_cost)
        self.reg_cost = build_match_cost(reg_cost)
        self.iou_cost = None
        if iou_cost is not None:
            self.iou_cost = build_match_cost(iou_cost)
    def __call__(self, preds: dict, gts: dict):
        # classification and bboxcost.
        cls_cost = self.cls_cost(preds['scores'], gts['labels'])
        # regression cost
        regkwargs = {}
        if 'masks' in preds and 'masks' in gts:
            assert isinstance(self.reg_cost, DynamicLinesCost), ' Issues!!'
            regkwargs = {
                'masks_pred': preds['masks'],
                'masks_gt': gts['masks'],
            }
        reg_cost = self.reg_cost(preds['lines'], gts['lines'], **regkwargs)
        # weighted sum of above three costs
        cost = cls_cost + reg_cost
        # Iou
        if self.iou_cost is not None:
            iou_cost = self.iou_cost(preds['lines'],gts['lines'])
            cost += iou_cost
        return cost
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/__init__.py
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/sythesis_det.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/sythesis_det.py
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class NoiseSythesis(nn.Module):
    def __init__(self, 
            p, scale=0.01, shift_scale=(8,5), 
            scaling_size=(0.1,0.1), canvas_size=(200, 100),
            bbox_type='sce',
            poly_coord_dim=2,
            bbox_coord_dim=2,
            quantify=True):
        super(NoiseSythesis, self).__init__()
        self.p = p
        self.scale = scale
        self.bbox_type = bbox_type
        self.quantify = quantify
        self.poly_coord_dim = poly_coord_dim
        self.bbox_coord_dim = bbox_coord_dim
        self.transforms = [self.random_shifting, self.random_scaling]
        # self.transforms = [self.random_scaling]
        self.register_buffer('canvas_size', torch.tensor(canvas_size))
        self.register_buffer('shift_scale', torch.tensor(shift_scale).float())
        self.register_buffer('scaling_size', torch.tensor(scaling_size))
    def random_scaling(self, bbox):
        '''
            bbox: B, paramter_num, 2
        '''
        device = bbox.device
        dtype = bbox.dtype
        B = bbox.shape[0]
        noise = (torch.rand(B, device=device)*2-1)[:,None,None] # [-1,1]
        scale = self.scaling_size.to(device)
        scale = (noise * scale) + 1
        scaled_bbox = bbox * scale
        # recenterization
        coffset = scaled_bbox.mean(-2) - bbox.float().mean(-2)
        scaled_bbox = scaled_bbox - coffset[:,None]
        return scaled_bbox.round().type(dtype)
    def random_shifting(self, bbox):
        '''
            bbox: B, paramter_num, 2
        '''
        device = bbox.device
        batch_size = bbox.shape[0]
        shift_scale = self.shift_scale
        scale = (bbox.max(1)[0] - bbox.min(1)[0]) * 0.1
        scale = torch.where(scale < shift_scale, scale, shift_scale)
        noise = (torch.rand(batch_size, 2, device=device)*2-1) # [-1,1]
        offset = (noise * scale).round().type(bbox.dtype)
        shifted_bbox = bbox + offset[:, None]
        return shifted_bbox
    def gaussian_noise_bbox(self, bbox):
        dtype = bbox.dtype
        batch_size = bbox.shape[0]
        scale = (self.canvas_size * self.scale)[:self.bbox_coord_dim]
        noisy_bbox = torch.normal(bbox.type(torch.float), scale)
        if self.quantify:
            noisy_bbox = noisy_bbox.round().type(dtype)
            # prevent out of bound case
            for i in range(self.bbox_coord_dim):
                noisy_bbox[...,i] =\
                    torch.clamp(noisy_bbox[...,0],1,self.canvas_size[i])
        else:
            noisy_bbox = noisy_bbox.type(torch.float)
        return noisy_bbox
    def gaussian_noise_poly(self, polyline, polyline_mask):
        device = polyline.device
        batchsize = polyline.shape[0]
        scale = self.canvas_size * self.scale
        polyline = F.pad(polyline,(0,self.poly_coord_dim-1))
        polyline = polyline.view(batchsize,-1, self.poly_coord_dim)
        mask = F.pad(polyline_mask[:,1:],(0,self.poly_coord_dim))
        noisy_polyline = torch.normal(polyline.type(torch.float), scale)
        if self.quantify:
            noisy_polyline = noisy_polyline.round().type(polyline.dtype)
            # prevent out of bound case
            for i in range(self.poly_coord_dim):
                noisy_polyline[...,i] =\
                    torch.clamp(noisy_polyline[...,i],0,self.canvas_size[i])
        else:
            noisy_polyline = noisy_polyline.type(torch.float)
        noisy_polyline = noisy_polyline.view(batchsize,-1) * mask
        noisy_polyline = noisy_polyline[:,:-(self.poly_coord_dim-1)]
        return noisy_polyline
    def random_apply(self, bbox):
        for t in self.transforms:
            if self.p < torch.rand(1):
                continue
            bbox = t(bbox)
        # prevent out of bound case
        bbox[...,0] =\
            torch.clamp(bbox[...,0],0,self.canvas_size[0])
        bbox[...,1] =\
            torch.clamp(bbox[...,1],0,self.canvas_size[1])
        return bbox
    def simple_aug(self, batch):
        # augment bbox
        if self.bbox_type in ['sce', 'xyxy']:
            fbbox = batch['bbox_flat']
            seq_len = fbbox.shape[0]
            bbox = fbbox.view(seq_len, -1, 2)
            bbox = self.gaussian_noise_bbox(bbox)
            fbbox_aug = bbox.view(seq_len, -1)
            aug_mask = torch.rand(fbbox.shape,device=fbbox.device)
            fbbox = torch.where(aug_mask<self.p, fbbox_aug, fbbox)
        elif self.bbox_type == 'rxyxy':
            fbbox = self.rbbox_aug(batch)
        elif self.bbox_type == 'convex_hull':
            fbbox = self.convex_hull_aug(batch)
        # augment
        polyline = batch['polylines']
        polyline_mask = batch['polyline_masks']
        polyline_aug = self.gaussian_noise_poly(polyline, polyline_mask)
        aug_mask = torch.rand(polyline.shape,device=polyline.device)
        polyline = torch.where(aug_mask<self.p, polyline_aug, polyline)
        return polyline, fbbox
    def rbbox_aug(self, batch):
        return None
    def convex_hull_aug(self,batch):
        return None
    def __call__(self, batch, simple_aug=False):
        if simple_aug:
            return self.simple_aug(batch)
        else:
            fbbox = batch['bbox_flat']
            seq_len = fbbox.shape[0]
            bbox = fbbox.view(seq_len, -1, self.bbox_coord_dim)
            aug_bbox = self.random_apply(bbox)
            aug_bbox_flat = aug_bbox.view(seq_len, -1)
        return aug_bbox_flat
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/__init__.py
 from .ipm_backbone import IPMEncoder
 __all__ = [
   'IPMEncoder'
 ]
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/internimage.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/internimage.py
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/ipm_backbone.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/ipm_backbone.py
 import copy
 import math
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from mmdet3d.models.builder import BACKBONES
 from mmdet.models import build_backbone, build_neck
 class UpsampleBlock(nn.Module):
    def __init__(self, ins, outs):
        super(UpsampleBlock, self).__init__()
        self.gn = nn.GroupNorm(32, outs)
        self.conv = nn.Conv2d(ins, outs, kernel_size=3,
                              stride=1, padding=1)  # same
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.relu(self.gn(x))
        x = self.upsample2x(x)
        return x
    def upsample2x(self, x):
        _, _, h, w = x.shape
        x = F.interpolate(x, size=(h*2, w*2),
                          mode='bilinear', align_corners=True)
        return x
 class Upsample(nn.Module):
    def __init__(self,
                 zoom_size=(2, 4, 8),
                 in_channels=128,
                 out_channels=128,
                 ):
        super(Upsample, self).__init__()
        self.out_channels = out_channels
        input_conv = UpsampleBlock(in_channels, out_channels)
        inter_conv = UpsampleBlock(out_channels, out_channels)
        fscale = []
        for scale_factor in zoom_size:
            layer_num = int(math.log2(scale_factor))
            if layer_num < 1:
                fscale.append(nn.Identity())
                continue
            tmp = [copy.deepcopy(input_conv), ]
            tmp += [copy.deepcopy(inter_conv) for i in range(layer_num-1)]
            fscale.append(nn.Sequential(*tmp))
        self.fscale = nn.ModuleList(fscale)
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)
    def forward(self, imgs):
        rescale_i = []
        for f, img in zip(self.fscale, imgs):
            rescale_i.append(f(img))
        out = sum(rescale_i)
        return out
 @BACKBONES.register_module()
 class IPMEncoder(nn.Module):
    '''
    encode cam features
    '''
    def __init__(self,
                 img_backbone,
                 img_neck,
                 upsample,
                 xbound=[-30.0, 30.0, 0.5],
                 ybound=[-15.0, 15.0, 0.5],
                 zbound=[-10.0, 10.0, 20.0],
                 heights=[-1.1, 0, 0.5, 1.1],
                 pretrained=None,
                 out_channels=128,
                 num_cam=6,
                 use_lidar=False,
                 use_image=True,
                 lidar_dim=128,
                 ):
        super(IPMEncoder, self).__init__()
        self.x_bound = xbound
        self.y_bound = ybound
        self.heights = heights
        self.num_cam = num_cam
        num_x = int((xbound[1] - xbound[0]) / xbound[2])
        num_y = int((ybound[1] - ybound[0]) / ybound[2])
        self.img_backbone = build_backbone(img_backbone)
        self.img_neck = build_neck(img_neck)
        self.upsample = Upsample(**upsample)
        self.use_image = use_image
        self.use_lidar = use_lidar
        if self.use_lidar:
            self.pp = PointPillarEncoder(lidar_dim, xbound, ybound, zbound)
            self.outconvs =\
                nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels//2, 
                            kernel_size=3, stride=1, padding=1)  # same
            if self.use_image:
                _out_channels = out_channels//2
            else:
                _out_channels = out_channels
            self.outconvs_lidar =\
                nn.Conv2d(lidar_dim, _out_channels, 
                            kernel_size=3, stride=1, padding=1)  # same
        else:
            self.outconvs =\
                nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels, 
                            kernel_size=3, stride=1, padding=1)  # same
        self.init_weights(pretrained=pretrained)
        # bev_plane
        bev_planes = [construct_plane_grid(
            xbound, ybound, h) for h in self.heights]
        self.register_buffer('bev_planes', torch.stack(
            bev_planes),)  # nlvl,bH,bW,2
        self.masked_embeds = nn.Embedding(len(heights), out_channels)
    def init_weights(self, pretrained=None):
        """Initialize model weights."""
        self.img_backbone.init_weights()
        self.img_neck.init_weights()
        self.upsample.init_weights()
        for p in self.outconvs.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
        if self.use_lidar:
            for p in self.outconvs_lidar.parameters():
                if p.dim() > 1:
                    nn.init.xavier_uniform_(p)
            for p in self.pp.parameters():
                if p.dim() > 1:
                    nn.init.xavier_uniform_(p)
    def extract_img_feat(self, imgs):
        '''
            Extract image feaftures and sum up into one pic
            Args:
                imgs: B, n_cam, C, iH, iW
            Returns: 
                img_feat: B * n_cam, C, H, W
        '''
        B, n_cam, C, iH, iW = imgs.shape
        imgs = imgs.view(B * n_cam, C, iH, iW)
        img_feats = self.img_backbone(imgs)
        # reduce the channel dim
        img_feats = self.img_neck(img_feats)
        # fuse four feature map
        img_feat = self.upsample(img_feats)
        return img_feat
    def forward(self, imgs, img_metas, *args, points=None, **kwargs):
        '''
            Args: 
                imgs: torch.Tensor of shape [B, N, 3, H, W]
                    N: number of cams
                img_metas: 
                    # N=6, ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT']
                    ego2cam: [B, N, 4, 4] 
                    cam_intrinsics: [B, N, 3, 3]
                    cam2ego_rotations: [B, N, 3, 3]
                    cam2ego_translations: [B, N, 3]
                    ...
            Outs:
                bev_feature: torch.Tensor of shape [B, C*nlvl, bH, bW]
        '''
        if self.use_image:
            self.B = imgs.shape[0]
            # Get transform matrix
            ego2cam = []
            for img_meta in img_metas:
                ego2cam.append(img_meta['ego2img'])
            img_shape = imgs.shape[-2:]
            ego2cam = np.asarray(ego2cam)
            # Image backbone
            img_feats = self.extract_img_feat(imgs)
            # IPM
            bev_feat, bev_feat_mask = self.ipm(img_feats, ego2cam, img_shape)
            # multi level into a same
            bev_feat = bev_feat.flatten(1, 2)
            bev_feat = self.outconvs(bev_feat)
        if self.use_lidar:
            lidar_feat = self.get_lidar_feature(points)
            if self.use_image:
                bev_feat = torch.cat([bev_feat,lidar_feat],dim=1)
            else:
                bev_feat = lidar_feat
        return bev_feat
    def ipm(self, cam_feat, ego2cam, img_shape):
        '''
            inverse project 
            Args:
                cam_feat: B*ncam, C, cH, cW
                img_shape: tuple(H, W)
            Returns:
                project_feat: B, C, nlvl, bH, bW
                bev_feat_mask: B, 1, nlvl, bH, bW
        '''
        C = cam_feat.shape[1]
        bev_grid = self.bev_planes.unsqueeze(0).repeat(self.B, 1, 1, 1, 1)
        nlvl, bH, bW = bev_grid.shape[1:4]
        bev_grid = bev_grid.flatten(1, 3)  # B, nlvl*W*H, 3
        # Find points in cam coords
        # bev_grid_pos: B*ncam, nlvl*bH*bW, 2
        bev_grid_pos, bev_cam_mask = get_campos(bev_grid, ego2cam, img_shape)
        # B*cam, nlvl*bH, bW, 2
        bev_grid_pos = bev_grid_pos.unflatten(-2, (nlvl*bH, bW))
        # project feat from 2D to bev plane
        projected_feature = F.grid_sample(
            cam_feat, bev_grid_pos, align_corners=False).view(self.B, -1, C, nlvl, bH, bW)  # B,cam,C,nlvl,bH,bW
        # B,cam,nlvl,bH,bW
        bev_feat_mask = bev_cam_mask.unflatten(-1, (nlvl, bH, bW))
        # eliminate the ncam
        # The bev feature is the sum of the 6 cameras
        bev_feat_mask = bev_feat_mask.unsqueeze(2)
        projected_feature = (projected_feature*bev_feat_mask).sum(1)
        num_feat = bev_feat_mask.sum(1)
        projected_feature = projected_feature / \
            num_feat.masked_fill(num_feat == 0, 1)
        # concatenate a position information
        # projected_feature: B, bH, bW, nlvl, C+3
        bev_grid = bev_grid.view(self.B, nlvl, bH, bW,
                                 3).permute(0, 4, 1, 2, 3)
        projected_feature = torch.cat(
            (projected_feature, bev_grid), dim=1)
        return projected_feature, bev_feat_mask.sum(1) > 0
    def get_lidar_feature(self, points):
        ptensor, pmask = points
        lidar_feature = self.pp(ptensor, pmask)
        # bev_grid = self.bev_planes[...,:-1].unsqueeze(0).repeat(self.B, 1, 1, 1, 1)
        # bev_grid = bev_grid[:,0]
        # bev_grid = bev_grid.permute(0, 3, 1, 2)
        # lidar_feature = torch.cat(
        #     (lidar_feature, bev_grid), dim=1)
        lidar_feature = self.outconvs_lidar(lidar_feature)
        return lidar_feature
 def construct_plane_grid(xbound, ybound, height: float, dtype=torch.float32):
    '''
        Returns:
            plane: H, W, 3
    '''
    xmin, xmax = xbound[0], xbound[1]
    num_x = int((xbound[1] - xbound[0]) / xbound[2])
    ymin, ymax = ybound[0], ybound[1]
    num_y = int((ybound[1] - ybound[0]) / ybound[2])
    x = torch.linspace(xmin, xmax, num_x, dtype=dtype)
    y = torch.linspace(ymin, ymax, num_y, dtype=dtype)
    # [num_y, num_x]
    y, x = torch.meshgrid(y, x)
    z = torch.ones_like(x) * height
    # [num_y, num_x, 3]
    plane = torch.stack([x, y, z], dim=-1)
    return plane
 def get_campos(reference_points, ego2cam, img_shape):
    '''
        Find the each refence point's corresponding pixel in each camera
        Args: 
            reference_points: [B, num_query, 3]
            ego2cam: (B, num_cam, 4, 4)
        Outs:
            reference_points_cam: (B*num_cam, num_query, 2)
            mask:  (B, num_cam, num_query)
            num_query == W*H
    '''
    ego2cam = reference_points.new_tensor(ego2cam)  # (B, N, 4, 4)
    reference_points = reference_points.clone()
    B, num_query = reference_points.shape[:2]
    num_cam = ego2cam.shape[1]
    # reference_points (B, num_queries, 4)
    reference_points = torch.cat(
        (reference_points, torch.ones_like(reference_points[..., :1])), -1)
    reference_points = reference_points.view(
        B, 1, num_query, 4).repeat(1, num_cam, 1, 1).unsqueeze(-1)
    ego2cam = ego2cam.view(
        B, num_cam, 1, 4, 4).repeat(1, 1, num_query, 1, 1)
    # reference_points_cam (B, num_cam, num_queries, 4)
    reference_points_cam = (ego2cam @ reference_points).squeeze(-1)
    eps = 1e-9
    mask = (reference_points_cam[..., 2:3] > eps)
    reference_points_cam =\
        reference_points_cam[..., 0:2] / \
        reference_points_cam[..., 2:3] + eps
    reference_points_cam[..., 0] /= img_shape[1]
    reference_points_cam[..., 1] /= img_shape[0]
    # from 0~1 to -1~1
    reference_points_cam = (reference_points_cam - 0.5) * 2
    mask = (mask & (reference_points_cam[..., 0:1] > -1.0)
                 & (reference_points_cam[..., 0:1] < 1.0)
                 & (reference_points_cam[..., 1:2] > -1.0)
                 & (reference_points_cam[..., 1:2] < 1.0))
    # (B, num_cam, num_query)
    mask = mask.view(B, num_cam, num_query)
    reference_points_cam = reference_points_cam.view(B*num_cam, num_query, 2)
    return reference_points_cam, mask
 def _test():
    pass
 if __name__ == '__main__':
    _test()
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/__init__.py
 from .base_map_head import BaseMapHead
 from .dg_head import DGHead
 from .map_element_detector import MapElementDetector
 from .polyline_generator import PolylineGenerator
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/base_map_head.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/base_map_head.py
 from abc import ABCMeta, abstractmethod
 import torch.nn as nn
 from mmcv.runner import auto_fp16
 from mmcv.utils import print_log
 from mmdet.utils import get_root_logger
 class BaseMapHead(nn.Module, metaclass=ABCMeta):
    """Base class for mappers."""
    def __init__(self):
        super(BaseMapHead, self).__init__()
        self.fp16_enabled = False
    def init_weights(self, pretrained=None):
        """Initialize the weights in detector.
        Args:
            pretrained (str, optional): Path to pre-trained weights.
                Defaults to None.
        """
        if pretrained is not None:
            logger = get_root_logger()
            print_log(f'load model from: {pretrained}', logger=logger)
    @auto_fp16(apply_to=('img', ))
    def forward(self, *args, **kwargs):
        pass
    @abstractmethod
    def loss(self, pred, gt):
        '''
        Compute loss
        Output:
            dict(
                loss: torch.Tensor
                log_vars: dict(
                    str: float,
                )
                num_samples: int
            )
        '''
        return
    @abstractmethod
    def post_process(self, pred):
        '''
        convert model predictions to vectorized outputs
        the output format should be consistent with the evaluation function
        '''
        return
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/__init__.py
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/causal_trans.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/causal_trans.py
 # the causal layer is credited by the https://github.com/alexmt-scale/causal-transformer-decoder
 # we made some change to stick with the polygen.
 import torch
 import torch.nn as nn
 from typing import Optional
 from torch import Tensor
 from mmcv.cnn.bricks.registry import ATTENTION
 from mmcv.utils import build_from_cfg
 def build_attention(cfg, default_args=None):
    """Builder for attention."""
    return build_from_cfg(cfg, ATTENTION, default_args)
 class CausalTransformerDecoder(nn.TransformerDecoder):
    """Implementation of a transformer decoder based on torch implementation but
    more efficient. The difference is that it doesn't need to recompute the
    embeddings of all the past decoded tokens but instead uses a cache to
    store them. This makes use of the fact that the attention of a decoder is
    causal, so new predicted tokens don't affect the old tokens' embedding bc
    the corresponding attention cells are masked.
    The complexity goes from seq_len^3 to seq_len^2.
    This only happens in eval mode.
    In training mode, teacher forcing makes these optimizations unnecessary. Hence the
    Decoder acts like a regular nn.TransformerDecoder (except that the attention tgt
    masks are handled for you).
    """
    def forward(
        self,
        tgt: Tensor,
        memory: Optional[Tensor] = None,
        cache: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        causal_mask: Optional[Tensor] = None,
    ) -> Tensor:
        """
        Args:
            tgt (Tensor): current_len_output x bsz x hidden_dim
            memory (Tensor): len_encoded_seq x bsz x hidden_dim
            cache (Optional[Tensor]):
                n_layers x (current_len_output - 1) x bsz x hidden_dim
                If current_len_output == 1, nothing is cached yet, so cache
                should be None. Same if the module is in training mode.
            others (Optional[Tensor]): see official documentations
        Returns:
            output (Tensor): current_len_output x bsz x hidden_dim
            cache (Optional[Tensor]): n_layers x current_len_output x bsz x hidden_dim
                Only returns it when module is in eval mode (no caching in training)
        """
        output = tgt
        if self.training:
            if cache is not None:
                raise ValueError(
                    "cache parameter should be None in training mode")
            for mod in self.layers:
                output = mod(
                    output,
                    memory,
                    memory_mask=memory_mask,
                    tgt_key_padding_mask=tgt_key_padding_mask,
                    memory_key_padding_mask=memory_key_padding_mask,
                    causal_mask=causal_mask,
                    only_last=False,
                )
            return output, cache
        else:
            new_token_cache = []
            for i, mod in enumerate(self.layers):
                output = mod(output, memory,
                             memory_mask=memory_mask,
                             tgt_key_padding_mask=tgt_key_padding_mask,
                             memory_key_padding_mask=memory_key_padding_mask,
                             causal_mask=causal_mask,
                             only_last=True if cache is not None else False)
                new_token_cache.append(output)
                # use the pre_calculated intermediate parameters.
                if cache is not None:
                    output = torch.cat([cache[i], output], dim=0)
            if cache is not None:
                new_cache = torch.cat(
                    [cache, torch.stack(new_token_cache, dim=0)], dim=1)
            else:
                new_cache = torch.stack(new_token_cache, dim=0)
            return output, new_cache
 class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
    def __init__(self, *args, re_zero=True, norm_first=True, map_attn_cfg=None, **kwargs):
        '''
            Args:
                re_zero: If True, alpha scale residuals with zero init.
        '''
        super(CausalTransformerDecoderLayer, self).__init__(*args, **kwargs)
        if re_zero:
            self.res_weight1 = nn.Parameter(torch.FloatTensor([0, ]))
            self.res_weight2 = nn.Parameter(torch.FloatTensor([0, ]))
            self.res_weight3 = nn.Parameter(torch.FloatTensor([0, ]))
        else:
            self.res_weight1 = 1.
            self.res_weight2 = 1.
            self.res_weight3 = 1.
        self.norm_first = norm_first
        self.map_attn = None
        if map_attn_cfg is not None:
            self.map_attn = build_attention(map_attn_cfg)
    def forward(
            self,
            tgt: Tensor,
            memory: Optional[Tensor] = None,
            memory_mask: Optional[Tensor] = None,
            tgt_key_padding_mask: Optional[Tensor] = None,
            memory_key_padding_mask: Optional[Tensor] = None,
            causal_mask: Optional[Tensor] = None,
            query: Optional[Tensor] = None,
            only_last=False) -> Tensor:
        """
        Args:
            see CausalTransformerDecoder
            query is not None model will perform query stream 
        Returns:
            Tensor:
                If training: embedding of the whole layer: seq_len x bsz x hidden_dim
                If eval mode: embedding of last token: 1 x bsz x hidden_dim
        """
        if not self.norm_first:
            raise ValueError(
                "norm_first parameter should be True!")
        if self.training:
            # the official Pytorch implementation
            x = tgt
            if query is not None:
                x = query
            x = x + self.res_weight1 * \
                self._sa_block(self.norm1(x), self.norm1(tgt), causal_mask,
                                tgt_key_padding_mask)
            if memory is not None:
                x = x + self.res_weight2 * \
                    self._mha_block(self.norm2(x), memory,
                                    memory_mask, memory_key_padding_mask)
            x = x + self.res_weight3*self._ff_block(self.norm3(x))
            return x
        # This part is adapted from the official Pytorch implementation
        # So that only the last token gets modified and returned.
        # we follow the pre-LN trans in https://arxiv.org/pdf/2002.04745v1.pdf .
        x = tgt
        if query is not None:
            x = query
        if only_last:
            x = x[-1:]
        if causal_mask is not None:
            attn_mask = causal_mask 
            if only_last:
                attn_mask = attn_mask[-1:]   # XXX
        else:
            attn_mask = None
        # efficient self attention
        x = x + self.res_weight1 * \
            self._sa_block(self.norm1(x), self.norm1(tgt), attn_mask,
                           tgt_key_padding_mask)
        # encoder-decoder attention
        if memory is not None:
            x = x + self.res_weight2 * \
                self._mha_block(self.norm2(x), memory,
                                memory_mask, memory_key_padding_mask)
        # final feed-forward network
        x = x + self.res_weight3*self._ff_block(self.norm3(x))
        return x
    # self-attention block
    def _sa_block(self, x: Tensor, mem: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.self_attn(x, mem, mem,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           need_weights=False)[0]
        return self.dropout1(x)
    # multihead attention block
    def _mha_block(self, x: Tensor, mem: Tensor,
                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.multihead_attn(x, mem, mem,
                                attn_mask=attn_mask,
                                key_padding_mask=key_padding_mask,
                                need_weights=False)[0]
        return self.dropout2(x)
    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout3(x)
 class PolygenTransformerEncoderLayer(nn.TransformerEncoderLayer):
    def __init__(self, *args, re_zero=True, norm_first=True, **kwargs):
        '''
            Args:
                re_zero: If True, alpha scale residuals with zero init.
        '''
        super(PolygenTransformerEncoderLayer, self).__init__(*args, **kwargs)
        if re_zero:
            self.res_weight1 = nn.Parameter(torch.FloatTensor([0, ]))
            self.res_weight2 = nn.Parameter(torch.FloatTensor([0, ]))
        else:
            self.res_weight1 = 1.
            self.res_weight2 = 1.
        self.norm_first = norm_first
    def forward(self, src: Tensor, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
        r"""Pass the input through the encoder layer.
        Args:
            src: the sequence to the encoder layer (required).
            src_mask: the mask for the src sequence (optional).
            src_key_padding_mask: the mask for the src keys per batch (optional).
        Shape:
            see the docs in Transformer class.
        """
        # see Fig. 1 of https://arxiv.org/pdf/2002.04745v1.pdf
        x = src
        if self.norm_first:
            x = x + self.res_weight1*self._sa_block(self.norm1(x), src_mask,
                                                    src_key_padding_mask)
            x = x + self.res_weight2*self._ff_block(self.norm2(x))
        else:
            x = self.norm1(
                x + self.res_weight1*self._sa_block(x, src_mask, src_key_padding_mask))
            x = self.norm2(x + self.res_weight2*self._ff_block(x))
        return x
    # self-attention block
    def _sa_block(self, x: Tensor,
                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
        x = self.self_attn(x, x, x,
                           attn_mask=attn_mask,
                           key_padding_mask=key_padding_mask,
                           need_weights=False)[0]
        return self.dropout1(x)
    # feed forward block
    def _ff_block(self, x: Tensor) -> Tensor:
        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
        return self.dropout2(x)
 def generate_square_subsequent_mask(sz: int, device: str = "cpu") -> torch.Tensor:
    """ Generate the attention mask for causal decoding """
    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
    mask = (
        mask.float()
        .masked_fill(mask == 0, float("-inf"))
        .masked_fill(mask == 1, float(0.0))
    ).to(device=device)
    return mask
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/utils.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/utils.py
 import torch
 import torch.nn.functional as F
 from torch import Tensor
 def generate_square_subsequent_mask(sz: int, condition_len: int = 1, bool_out=False, device: str = "cpu") -> torch.Tensor:
    """ Generate the attention mask for causal decoding """
    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
    if condition_len > 1:
        mask[:condition_len,:condition_len] = 1
    if not bool_out:
        mask = (
            mask.float()
            .masked_fill(mask == 0, float("-inf"))
            .masked_fill(mask == 1, float(0.0)))
    return mask.to(device=device)
 def dequantize_verts(verts, canvas_size: Tensor, add_noise=False):
    """Quantizes vertices and outputs integers with specified n_bits."""
    min_range = -1
    max_range = 1
    range_quantize = canvas_size
    verts = verts.type(torch.float32)
    verts = verts * (max_range - min_range) / range_quantize + min_range
    if add_noise:
        verts += torch.rand_like(verts) * range_quantize
    return verts
 def quantize_verts(
        verts,
        canvas_size: Tensor):
    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
        Args:
            verts: seqlen, 2
    """
    min_range = -1
    max_range = 1
    range_quantize = canvas_size-1
    verts_ratio = (verts - min_range) / (
        max_range - min_range)
    verts_quantize = verts_ratio * range_quantize
    return verts_quantize.type(torch.int32)
 def top_k_logits(logits, k):
    """Masks logits such that logits not in top-k are small."""
    if k == 0:
        return logits
    else:
        values, _ = torch.topk(logits, k=k)
        k_largest = torch.min(values)
        logits = torch.where(logits < k_largest,
                             torch.ones_like(logits)*-1e9, logits)
        return logits
 def top_p_logits(logits, p):
    """Masks logits using nucleus (top-p) sampling."""
    if p == 1:
        return logits
    else:
        seq, dim = logits.shape[1:]
        logits = logits.view(-1, dim)
        sort_indices = torch.argsort(logits, dim=-1, descending=True)
        probs = F.softmax(logits, dim=-1).gather(-1, sort_indices)
        cumprobs = torch.cumsum(probs, dim=-1) - probs
        # The top 1 candidate always will not be masked.
        # This way ensures at least 1 indices will be selected.
        sort_mask = (cumprobs > p).type(logits.dtype)
        batch_indices = torch.repeat_interleave(
            torch.arange(logits.shape[0]).unsqueeze(-1), dim, dim=-1)
        top_p_mask = torch.zeros_like(logits)
        top_p_mask = top_p_mask.scatter_add(-1, sort_indices, sort_mask)
        logits -= top_p_mask * 1e9
        return logits.view(-1, seq, dim)
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_bbox.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_bbox.py
 import copy
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from mmcv.cnn import Conv2d, Linear
 from mmcv.runner import force_fp32
 from torch.distributions.categorical import Categorical
 from mmdet.core import multi_apply, reduce_mean
 from mmdet.models import HEADS
 from .detr_head import DETRMapFixedNumHead
 @HEADS.register_module(force=True)
 class DETRBboxHead(DETRMapFixedNumHead):
    def __init__(self, *args, canvas_size=(400, 200), discrete_output=True, separate_detect=True, 
        mode='xyxy', bbox_size=None, coord_dim=2, kp_coord_dim=2,
        **kwargs):
        self.canvas_size = canvas_size  # hard code
        self.separate_detect = separate_detect
        self.discrete_output = discrete_output
        self.bbox_size = 3 if mode=='sce' else 2
        if bbox_size is not None:
            self.bbox_size = bbox_size
        self.coord_dim = coord_dim  # for xyz
        self.kp_coord_dim = kp_coord_dim
        super(DETRBboxHead, self).__init__(*args, **kwargs)
        del self.canvas_size
        self.register_buffer('canvas_size', torch.tensor(canvas_size))
        self._init_embedding()
    def _init_embedding(self):
        # for bbox parameter xstart, ystart, xend, yend
        self.bbox_embedding = nn.Embedding(4, self.embed_dims)
        self.label_embed = nn.Embedding(
            self.num_classes, self.embed_dims)
        self.img_coord_embed = nn.Linear(2, self.embed_dims)
    def _init_branch(self,):
        """Initialize classification branch and regression branch of head."""
        # add sigmoid or not
        if self.separate_detect:
            if self.cls_out_channels == self.num_classes+1:
                self.cls_out_channels = 2
            else:
                self.cls_out_channels = 1
        fc_cls = Linear(self.embed_dims, self.cls_out_channels)
        reg_branch = []
        for _ in range(self.num_reg_fcs):
            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
            reg_branch.append(nn.LayerNorm(self.embed_dims))
            reg_branch.append(nn.ReLU())
        if self.discrete_output:
            reg_branch.append(nn.Linear(
                self.embed_dims, max(self.canvas_size), bias=True,))
        else:
            reg_branch.append(nn.Linear(
                self.embed_dims, self.bbox_size*self.coord_dim, bias=True,))
        reg_branch = nn.Sequential(*reg_branch)
        def _get_clones(module, N):
            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
        num_pred = self.transformer.decoder.num_layers
        if self.iterative:
            fc_cls = _get_clones(fc_cls, num_pred)
            reg_branch = _get_clones(reg_branch, num_pred)
        self.pre_branches = nn.ModuleDict([
            ('cls', fc_cls),
            ('reg', reg_branch), ])
    def _prepare_context(self, batch, context):
        """Prepare class label and vertex context."""
        global_context_embedding = None
        if self.separate_detect:
            global_context_embedding = self.label_embed(batch['class_label'])
        # Image context
        if self.separate_detect:
            image_embeddings = assign_bev(
                context['bev_embeddings'], batch['batch_idx'])
        else:
            image_embeddings = context['bev_embeddings']
        image_embeddings = self.input_proj(
            image_embeddings)  # only change feature size
        # Pass images through encoder
        device = image_embeddings.device
        # Add 2D coordinate grid embedding
        B, C, H, W = image_embeddings.shape
        Ws = torch.linspace(-1., 1., W)
        Hs = torch.linspace(-1., 1., H)
        image_coords = torch.stack(
            torch.meshgrid(Hs, Ws), dim=-1).to(device)
        image_coord_embeddings = self.img_coord_embed(image_coords)
        image_embeddings += image_coord_embeddings[None].permute(0, 3, 1, 2)
        # Reshape spatial grid to sequence
        sequential_context_embeddings = image_embeddings.reshape(
            B, C, H, W)
        return (global_context_embedding, sequential_context_embeddings)
    def forward(self, batch, context, img_metas=None):
        '''
        Args:
            bev_feature (List[Tensor]): shape [B, C, H, W]
                feature in bev view
            img_metas
        Outs:
            preds_dict (Dict):
                all_cls_scores (Tensor): Classification score of all
                    decoder layers, has shape
                    [nb_dec, bs, num_query, cls_out_channels].
                all_lines_preds (Tensor):
                    [nb_dec, bs, num_query, num_points, 2].
        '''
        (global_context_embedding, sequential_context_embeddings) =\
            self._prepare_context(batch, context)
        if self.separate_detect:
            query_embedding = self.query_embedding.weight[None] + \
                global_context_embedding[:, None]
        else:
            B = sequential_context_embeddings.shape[0]
            query_embedding = self.query_embedding.weight[None].repeat(B, 1, 1)
        x = sequential_context_embeddings
        B, C, H, W = x.shape
        masks = x.new_zeros((B, H, W))
        pos_embed = self.positional_encoding(masks)
        # outs_dec: [nb_dec, bs, num_query, embed_dim]
        outs_dec, _ = self.transformer(x, masks.type(torch.bool), query_embedding,
                                       pos_embed)
        outputs = []
        for i, query_feat in enumerate(outs_dec):
            outputs.append(self.get_prediction(query_feat))
        return outputs
    def get_prediction(self, query_feat):
        ocls = self.pre_branches['cls'](query_feat)
        if self.discrete_output:
            pos = []
            for i in range(4):
                pos_embeds = self.bbox_embedding.weight[i]
                _pos = self.pre_branches['reg'](query_feat+pos_embeds)
                pos.append(_pos)
            # # y mask
            # _vert_mask = torch.arange(logits.shape[-1], device=logits.device)
            # vertices_mask_y = (_vert_mask < self.canvas_size[1]+1)
            # logits[:,1::2] = logits[:,1::2]*vertices_mask_y - ~vertices_mask_y*1e9
            logits = torch.stack(pos, dim=-2)/1.
            lines = Categorical(logits=logits)
        else:
            lines = self.pre_branches['reg'](query_feat).sigmoid()
            lines = lines.unflatten(-1, (self.bbox_size, self.coord_dim))*self.canvas_size
            lines = lines.flatten(-2)
        return dict(
            lines=lines,  # [bs, num_query, 4, num_canvas_size]
            scores=ocls,  # [bs, num_query, num_class]
        )
    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
    def _get_target_single(self,
                           score_pred,
                           lines_pred,
                           gt_labels,
                           gt_lines,
                           gt_bboxes_ignore=None):
        """
            Compute regression and classification targets for one image.
            Outputs from a single decoder layer of a single feature level are used.
            Args:
                cls_score (Tensor): Box score logits from a single decoder layer
                    for one image. Shape [num_query, cls_out_channels].
                lines_pred (Tensor):
                    shape [num_query, num_points, 2].
                gt_lines (Tensor):
                    shape [num_gt, num_points, 2].
                gt_labels (torch.LongTensor)
                    shape [num_gt, ]
            Returns:
                tuple[Tensor]: a tuple containing the following for one image.
                    - labels (LongTensor): Labels of each image.
                        shape [num_query, 1]
                    - label_weights (Tensor]): Label weights of each image.
                        shape [num_query, 1]
                    - lines_target (Tensor): Lines targets of each image.
                        shape [num_query, num_points, 2]
                    - lines_weights (Tensor): Lines weights of each image.
                        shape [num_query, num_points, 2]
                    - pos_inds (Tensor): Sampled positive indices for each image.
                    - neg_inds (Tensor): Sampled negative indices for each image.
        """
        num_pred_lines = len(lines_pred)
        # assigner and sampler
        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
                                             gts=dict(lines=gt_lines,
                                                      labels=gt_labels, ),
                                             gt_bboxes_ignore=gt_bboxes_ignore)
        sampling_result = self.sampler.sample(
            assign_result, lines_pred, gt_lines)
        pos_inds = sampling_result.pos_inds
        neg_inds = sampling_result.neg_inds
        pos_gt_inds = sampling_result.pos_assigned_gt_inds
        # label targets 0: foreground, 1: background
        if self.separate_detect:
            labels = gt_lines.new_full((num_pred_lines, ), 1, dtype=torch.long)
        else:
            labels = gt_lines.new_full(
                (num_pred_lines, ), self.num_classes, dtype=torch.long)
        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
        label_weights = gt_lines.new_ones(num_pred_lines)
        # bbox targets since lines_pred's last dimension is the vocabulary
        # and ground truth dose not have this dimension.
        if self.discrete_output:
            lines_target = torch.zeros_like(lines_pred[..., 0]).long()
            lines_weights = torch.zeros_like(lines_pred[..., 0])
        else:
            lines_target = torch.zeros_like(lines_pred)
            lines_weights = torch.zeros_like(lines_pred)
        lines_target[pos_inds] = sampling_result.pos_gt_bboxes.type(
            lines_target.dtype)
        lines_weights[pos_inds] = 1.0
        n = lines_weights.sum(-1, keepdim=True)
        lines_weights = lines_weights / n.masked_fill(n == 0, 1)
        return (labels, label_weights, lines_target, lines_weights,
                pos_inds, neg_inds, pos_gt_inds)
    # @force_fp32(apply_to=('preds', 'gts'))
    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
        """
            Compute regression and classification targets for a batch image.
            Outputs from a single decoder layer of a single feature level are used.
            Args:
                cls_scores_list (list[Tensor]): Box score logits from a single
                    decoder layer for each image with shape [num_query,
                    cls_out_channels].
                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
                gt_lines_list (list[Tensor]): Ground truth lines for each image
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
                    boxes which can be ignored for each image. Default None.
            Returns:
                tuple: a tuple containing the following targets.
                    - labels_list (list[Tensor]): Labels for all images.
                    - label_weights_list (list[Tensor]): Label weights for all \
                        images.
                    - lines_targets_list (list[Tensor]): Lines targets for all \
                        images.
                    - lines_weight_list (list[Tensor]): Lines weights for all \
                        images.
                    - num_total_pos (int): Number of positive samples in all \
                        images.
                    - num_total_neg (int): Number of negative samples in all \
                        images.
        """
        assert gt_bboxes_ignore_list is None, \
            'Only supports for gt_bboxes_ignore setting to None.'
        # format the inputs
        if self.separate_detect:
            bbox = [b[m] for b, m in zip(gts['bbox'], gts['bbox_mask'])]
            class_label = torch.zeros_like(gts['bbox_mask']).long()
            class_label = [b[m] for b, m in zip(class_label, gts['bbox_mask'])]
        else:
            class_label = gts['class_label']
            bbox = gts['bbox']
        if self.discrete_output:
            lines_pred = preds['lines'].logits
        else:
            lines_pred = preds['lines']
            bbox = [b.float() for b in bbox]
        (labels_list, label_weights_list,
         lines_targets_list, lines_weights_list,
         pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(
             self._get_target_single,
             preds['scores'], lines_pred,
             class_label, bbox,
             gt_bboxes_ignore=gt_bboxes_ignore_list)
        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
        new_gts = dict(
            labels=labels_list,
            label_weights=label_weights_list,
            bboxs=lines_targets_list,
            bboxs_weights=lines_weights_list,
        )
        return new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list
    # @force_fp32(apply_to=('preds', 'gts'))
    def loss_single(self,
                    preds: dict,
                    gts: dict,
                    gt_bboxes_ignore_list=None,
                    reduction='none'):
        """
            Loss function for outputs from a single decoder layer of a single
            feature level.
            Args:
                cls_scores (Tensor): Box score logits from a single decoder layer
                    for all images. Shape [bs, num_query, cls_out_channels].
                lines_preds (Tensor):
                    shape [bs, num_query, num_points, 2].
                gt_lines_list (list[Tensor]):
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
                    boxes which can be ignored for each image. Default None.
            Returns:
                dict[str, Tensor]: A dictionary of loss components for outputs from
                    a single decoder layer.
        """
        # Get target for each sample
        new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\
            self.get_targets(preds, gts, gt_bboxes_ignore_list)
        # Batched all data
        for k, v in new_gts.items():
            new_gts[k] = torch.stack(v, dim=0)
        # construct weighted avg_factor to match with the official DETR repo
        cls_avg_factor = num_total_pos * 1.0 + \
            num_total_neg * self.bg_cls_weight
        if self.sync_cls_avg_factor:
            cls_avg_factor = reduce_mean(
                preds['scores'].new_tensor([cls_avg_factor]))
        cls_avg_factor = max(cls_avg_factor, 1)
        # Classification loss
        if self.separate_detect:
            loss_cls = self.bce_loss(
                preds['scores'], new_gts['labels'], new_gts['label_weights'], cls_avg_factor)
        else:
            # since the inputs needs the second dim is the class dim, we permute the prediction.
            cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
            cls_labels = new_gts['labels'].reshape(-1)
            cls_weights = new_gts['label_weights'].reshape(-1)
            loss_cls = self.loss_cls(
                cls_scores, cls_labels, cls_weights, avg_factor=cls_avg_factor)
        # Compute the average number of gt boxes accross all gpus, for
        # normalization purposes
        num_total_pos = loss_cls.new_tensor([num_total_pos])
        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
        # position NLL loss
        if self.discrete_output:
            loss_reg = -(preds['lines'].log_prob(new_gts['bboxs']) *
                         new_gts['bboxs_weights']).sum()/(num_total_pos)
        else:
            loss_reg = self.reg_loss(
                preds['lines'], new_gts['bboxs'], new_gts['bboxs_weights'], avg_factor=num_total_pos)
        loss_dict = dict(
            cls=loss_cls,
            reg=loss_reg,
        )
        return loss_dict, pos_inds_list, pos_gt_inds_list
    def bce_loss(self, logits, label, weights, cls_avg_factor):
        ''' binary ce plog(p) + (1-p)log(1-p)
            logits: B,n,1
            label:
        '''
        p = logits.squeeze(-1).sigmoid()
        pos_msk = label == 0
        neg_msk = ~pos_msk
        loss_cls = -(p.log()*pos_msk + (1-p).log()*neg_msk)
        loss_cls = (loss_cls * weights).sum()/cls_avg_factor
        return loss_cls
    def post_process(self, preds_dicts: list, **kwargs):
        '''
        Args:
            preds_dicts:
                scores (Tensor): Classification score of all
                    decoder layers, has shape
                    [nb_dec, bs, num_query, cls_out_channels].
                lines (Tensor):
                    [nb_dec, bs, num_query, bbox parameters(4)].
        Outs:
            ret_list (List[Dict]) with length as bs
                list of result dict for each sample in the batch
                XXX
        '''
        preds = preds_dicts[-1]
        batched_cls_scores = preds['scores']
        batched_lines_preds = preds['lines']
        batch_size = batched_cls_scores.size(0)
        device = batched_cls_scores.device
        result_dict = {
            'bbox': [],
            'scores': [],
            'labels': [],
            'bbox_flat': [],
            'lines_cls': [],
            'lines_bs_idx': [],
        }
        for i in range(batch_size):
            cls_scores = batched_cls_scores[i]
            det_preds = batched_lines_preds[i]
            max_num = self.max_lines
            if self.loss_cls.use_sigmoid:
                cls_scores = cls_scores.sigmoid()
                scores, valid_idx = cls_scores.view(-1).topk(max_num)
                det_labels = valid_idx % self.num_classes
                valid_idx = valid_idx // self.num_classes
                det_preds = det_preds[valid_idx]
            else:
                scores, det_labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
                scores, valid_idx = scores.topk(max_num)
                det_preds = det_preds[valid_idx]
                det_labels = det_labels[valid_idx]
            nline = len(valid_idx)
            result_dict['bbox'].append(det_preds)
            result_dict['scores'].append(scores)
            result_dict['labels'].append(det_labels)
            result_dict['lines_bs_idx'].extend([i]*nline)
        # for down stream polyline
        _bboxs = torch.cat(result_dict['bbox'], dim=0)
        # quantize the data
        result_dict['bbox_flat'] = torch.round(_bboxs).type(torch.int32)
        result_dict['lines_cls'] = torch.cat(
            result_dict['labels'], dim=0).long()
        result_dict['lines_bs_idx'] = torch.tensor(
            result_dict['lines_bs_idx'], device=device).long()
        return result_dict
 def assign_bev(feat, idx):
    return feat[idx]
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_head.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_head.py
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import copy
 from mmdet.models import HEADS
 from mmcv.cnn import Conv2d
 from mmcv.cnn import Linear, build_activation_layer, bias_init_with_prob
 from mmcv.cnn.bricks.transformer import build_positional_encoding
 from mmdet.models.utils import build_transformer
 from mmcv.runner import force_fp32
 from mmdet.core import (multi_apply, build_assigner, build_sampler,
                        reduce_mean)
 from mmdet.models.utils.transformer import inverse_sigmoid
 from mmdet.models import build_loss
 from .base_map_head import BaseMapHead
 @HEADS.register_module()
 class DETRMapFixedNumHead(BaseMapHead):
    def __init__(self,
                 num_classes=3,
                 in_channels=128,
                 num_query=100,
                 max_lines=50,
                 score_thre=0.2,
                 num_reg_fcs=2,
                 num_points=100,
                 iterative=False,
                 patch_size=None,
                 sync_cls_avg_factor=True,
                 transformer: dict = None,
                 positional_encoding: dict = None,
                 loss_cls: dict = None,
                 loss_reg: dict = None,
                 train_cfg: dict = None,
                 init_cfg=None,
                 **kwargs):
        super().__init__()
        assigner = train_cfg['assigner']
        self.assigner = build_assigner(assigner)
        # DETR sampling=False, so use PseudoSampler
        sampler_cfg = dict(type='PseudoSampler')
        self.sampler = build_sampler(sampler_cfg, context=self)
        self.train_cfg = train_cfg
        self.max_lines = max_lines
        self.score_thre = score_thre
        self.num_query = num_query
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.num_points = num_points
        # branch
        # if loss_cls.use_sigmoid:
        if loss_cls['use_sigmoid']:
            self.cls_out_channels = num_classes
        else:
            self.cls_out_channels = num_classes+1
        self.iterative = iterative
        self.num_reg_fcs = num_reg_fcs
        if patch_size is not None:
            self.register_buffer('patch_size', torch.tensor(
                (patch_size[1], patch_size[0])),)
        self._build_transformer(transformer, positional_encoding)
        # loss params
        self.loss_cls = build_loss(loss_cls)
        self.bg_cls_weight = 0.1
        if self.loss_cls.use_sigmoid:
            self.bg_cls_weight = 0.0
        self.sync_cls_avg_factor = sync_cls_avg_factor
        self.reg_loss = build_loss(loss_reg)
        # add reg, cls head for each decoder layer
        self._init_layers()
        self._init_branch()
        self.init_weights()
    def _init_layers(self):
        """Initialize some layer."""
        self.input_proj = Conv2d(
            self.in_channels, self.embed_dims, kernel_size=1)
        # query_pos_embed & query_embed
        self.query_embedding = nn.Embedding(self.num_query,
                                            self.embed_dims)
    def _build_transformer(self, transformer, positional_encoding):
        # transformer
        self.act_cfg = transformer.get('act_cfg',
                                       dict(type='ReLU', inplace=True))
        self.activate = build_activation_layer(self.act_cfg)
        self.positional_encoding = build_positional_encoding(
            positional_encoding)
        self.transformer = build_transformer(transformer)
        self.embed_dims = self.transformer.embed_dims
    def _init_branch(self,):
        """Initialize classification branch and regression branch of head."""
        fc_cls = Linear(self.embed_dims, self.cls_out_channels)
        reg_branch = []
        for _ in range(self.num_reg_fcs):
            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
            reg_branch.append(nn.LayerNorm(self.embed_dims))
            reg_branch.append(nn.ReLU())
        reg_branch.append(Linear(self.embed_dims, self.num_points*2))
        reg_branch = nn.Sequential(*reg_branch)
        # add sigmoid or not
        def _get_clones(module, N):
            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
        num_pred = self.transformer.decoder.num_layers
        if self.iterative:
            fc_cls = _get_clones(fc_cls, num_pred)
            reg_branch = _get_clones(reg_branch, num_pred)
        self.pre_branches = nn.ModuleDict([
            ('cls', fc_cls),
            ('reg', reg_branch), ])
    def init_weights(self):
        """Initialize weights of the DeformDETR head."""
        for p in self.input_proj.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
        self.transformer.init_weights()
        # init prediction branch
        for k, v in self.pre_branches.items():
            for param in v.parameters():
                if param.dim() > 1:
                    nn.init.xavier_uniform_(param)
        # focal loss init
        if self.loss_cls.use_sigmoid:
            bias_init = bias_init_with_prob(0.01)
            # for last layer
            if isinstance(self.pre_branches['cls'], nn.ModuleList):
                for m in self.pre_branches['cls']:
                    nn.init.constant_(m.bias, bias_init)
            else:
                m = self.pre_branches['cls']
                nn.init.constant_(m.bias, bias_init)
    def forward(self, bev_feature, img_metas=None):
        '''
        Args:
            bev_feature (List[Tensor]): shape [B, C, H, W]
                feature in bev view
            img_metas
        Outs:
            preds_dict (Dict):
                all_cls_scores (Tensor): Classification score of all
                    decoder layers, has shape
                    [nb_dec, bs, num_query, cls_out_channels].
                all_lines_preds (Tensor):
                    [nb_dec, bs, num_query, num_points, 2].
        '''
        x = bev_feature[0]
        x = self.input_proj(x)  # only change feature size
        B, C, H, W = x.shape
        masks = x.new_zeros((B, H, W))
        pos_embed = self.positional_encoding(masks)
        # outs_dec: [nb_dec, bs, num_query, embed_dim]
        outs_dec, _ = self.transformer(x, masks.type(torch.bool), self.query_embedding.weight,
                                       pos_embed)
        outputs = []
        for i, query_feat in enumerate(outs_dec):
            ocls = self.pre_branches['cls'](query_feat)
            oreg = self.pre_branches['reg'](query_feat)
            oreg = oreg.unflatten(dim=2, sizes=(self.num_points, 2))
            oreg[..., 0:2] = oreg[..., 0:2].sigmoid()  # normalized xyz
            outputs.append(
                dict(
                    lines=oreg,  # [bs, num_query, num_points, 2]
                    scores=ocls,  # [bs, num_query, num_class]
                )
            )
        return outputs
    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
    def _get_target_single(self,
                           score_pred,
                           lines_pred,
                           gt_lines,
                           gt_labels,
                           gt_bboxes_ignore=None):
        """
            Compute regression and classification targets for one image.
            Outputs from a single decoder layer of a single feature level are used.
            Args:
                cls_score (Tensor): Box score logits from a single decoder layer
                    for one image. Shape [num_query, cls_out_channels].
                lines_pred (Tensor):
                    shape [num_query, num_points, 2].
                gt_lines (Tensor):
                    shape [num_gt, num_points, 2].
                gt_labels (torch.LongTensor)
                    shape [num_gt, ]
            Returns:
                tuple[Tensor]: a tuple containing the following for one image.
                    - labels (LongTensor): Labels of each image.
                        shape [num_query, 1]
                    - label_weights (Tensor]): Label weights of each image.
                        shape [num_query, 1]
                    - lines_target (Tensor): Lines targets of each image.
                        shape [num_query, num_points, 2]
                    - lines_weights (Tensor): Lines weights of each image.
                        shape [num_query, num_points, 2]
                    - pos_inds (Tensor): Sampled positive indices for each image.
                    - neg_inds (Tensor): Sampled negative indices for each image.
        """
        num_pred_lines = lines_pred.size(0)
        # assigner and sampler
        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
                                             gts=dict(lines=gt_lines,
                                                      labels=gt_labels, ),
                                             gt_bboxes_ignore=gt_bboxes_ignore)
        sampling_result = self.sampler.sample(
            assign_result, lines_pred, gt_lines)
        pos_inds = sampling_result.pos_inds
        neg_inds = sampling_result.neg_inds
        # label targets
        labels = gt_lines.new_full((num_pred_lines, ),
                                   self.num_classes,
                                   dtype=torch.long)
        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
        label_weights = gt_lines.new_ones(num_pred_lines)
        # bbox targets
        lines_target = torch.zeros_like(lines_pred)
        lines_target[pos_inds] = sampling_result.pos_gt_bboxes
        lines_weights = torch.zeros_like(lines_pred)
        lines_weights[pos_inds] = 1.0
        return (labels, label_weights, lines_target, lines_weights,
                pos_inds, neg_inds)
    @force_fp32(apply_to=('preds', 'gts'))
    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
        """
            Compute regression and classification targets for a batch image.
            Outputs from a single decoder layer of a single feature level are used.
            Args:
                cls_scores_list (list[Tensor]): Box score logits from a single
                    decoder layer for each image with shape [num_query,
                    cls_out_channels].
                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
                gt_lines_list (list[Tensor]): Ground truth lines for each image
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
                    boxes which can be ignored for each image. Default None.
            Returns:
                tuple: a tuple containing the following targets.
                    - labels_list (list[Tensor]): Labels for all images.
                    - label_weights_list (list[Tensor]): Label weights for all \
                        images.
                    - lines_targets_list (list[Tensor]): Lines targets for all \
                        images.
                    - lines_weight_list (list[Tensor]): Lines weights for all \
                        images.
                    - num_total_pos (int): Number of positive samples in all \
                        images.
                    - num_total_neg (int): Number of negative samples in all \
                        images.
        """
        assert gt_bboxes_ignore_list is None, \
            'Only supports for gt_bboxes_ignore setting to None.'
        (labels_list, label_weights_list,
         lines_targets_list, lines_weights_list,
         pos_inds_list, neg_inds_list) = multi_apply(
             self._get_target_single,
             preds['scores'], preds['lines'],
             gts['lines'], gts['labels'],
             gt_bboxes_ignore=gt_bboxes_ignore_list)
        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
        new_gts = dict(
            labels=labels_list,
            label_weights=label_weights_list,
            lines_targets=lines_targets_list,
            lines_weights=lines_weights_list,
        )
        return new_gts, num_total_pos, num_total_neg, pos_inds_list
    @force_fp32(apply_to=('preds', 'gts'))
    def loss_single(self,
                    preds: dict,
                    gts: dict,
                    gt_bboxes_ignore_list=None,
                    reduction='none'):
        """ 
            Loss function for outputs from a single decoder layer of a single
            feature level.
            Args:
                cls_scores (Tensor): Box score logits from a single decoder layer
                    for all images. Shape [bs, num_query, cls_out_channels].
                lines_preds (Tensor):
                    shape [bs, num_query, num_points, 2].
                gt_lines_list (list[Tensor]): 
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
                    boxes which can be ignored for each image. Default None.
            Returns:
                dict[str, Tensor]: A dictionary of loss components for outputs from
                    a single decoder layer.
        """
        # get target for each sample
        new_gts, num_total_pos, num_total_neg, pos_inds_list =\
            self.get_targets(preds, gts, gt_bboxes_ignore_list)
        # batched all data
        for k, v in new_gts.items():
            new_gts[k] = torch.cat(v, 0)
        # construct weighted avg_factor to match with the official DETR repo
        cls_avg_factor = num_total_pos * 1.0 + \
            num_total_neg * self.bg_cls_weight
        if self.sync_cls_avg_factor:
            cls_avg_factor = reduce_mean(
                preds['scores'].new_tensor([cls_avg_factor]))
        cls_avg_factor = max(cls_avg_factor, 1)
        # classification loss
        cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
        loss_cls = self.loss_cls(
            cls_scores, new_gts['labels'], new_gts['label_weights'], avg_factor=cls_avg_factor)
        # Compute the average number of gt boxes accross all gpus, for
        # normalization purposes
        num_total_pos = loss_cls.new_tensor([num_total_pos])
        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
        # regression L1 loss
        lines_preds = preds['lines'].reshape(-1, self.num_points, 2)
        if reduction == 'none':  # For performance analysis
            loss_reg = self.reg_loss(
                lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], reduction_override=reduction, avg_factor=num_total_pos)
        else:
            loss_reg = self.reg_loss(
                lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], avg_factor=num_total_pos)
        loss_dict = dict(
            cls=loss_cls,
            reg=loss_reg,
        )
        return (loss_dict, pos_inds_list)
    @force_fp32(apply_to=('gt_lines_list', 'preds_dicts'))
    def loss(self,
             gts: dict,
             preds_dicts: dict,
             gt_bboxes_ignore=None,
             reduction='mean'):
        """
            Loss Function.
            Args:
                gt_lines_list (list[Tensor]): Ground truth lines for each image
                    with shape (num_gts, num_points, 2)
                gt_labels_list (list[Tensor]): Ground truth class indices for each
                    image with shape (num_gts, ).
                preds_dicts:
                    all_cls_scores (Tensor): Classification score of all
                        decoder layers, has shape
                        [nb_dec, bs, num_query, cls_out_channels].
                    all_lines_preds (Tensor):
                        [nb_dec, bs, num_query, num_points, 2].
                gt_bboxes_ignore (list[Tensor], optional): Bounding boxes
                    which can be ignored for each image. Default None.
            Returns:
                dict[str, Tensor]: A dictionary of loss components.
        """
        assert gt_bboxes_ignore is None, \
            f'{self.__class__.__name__} only supports ' \
            f'for gt_bboxes_ignore setting to None.'
        # Since there might have multi layer
        losses, pos_inds_lists, pos_gt_inds_lists = multi_apply(
            self.loss_single,
            preds_dicts,
            gts=gts,
            gt_bboxes_ignore_list=gt_bboxes_ignore,
            reduction=reduction)
        # Format the losses
        loss_dict = dict()
        # loss from the last decoder layer
        for k, v in losses[-1].items():
            loss_dict[k] = v
        # Loss from other decoder layers
        num_dec_layer = 0
        for loss in losses[:-1]:
            for k, v in loss.items():
                loss_dict[f'd{num_dec_layer}.{k}'] = v
            num_dec_layer += 1
        return loss_dict, pos_inds_lists, pos_gt_inds_lists
    def post_process(self, preds_dict, tokens, gts):
        '''
        Args:
            preds_dict:
                all_cls_scores (Tensor): Classification score of all
                    decoder layers, has shape
                    [nb_dec, bs, num_query, cls_out_channels].
                all_lines_preds (Tensor):
                    [nb_dec, bs, num_query, num_points, 2].
        Outs:
            ret_list (List[Dict]) with length as bs
                list of result dict for each sample in the batch
                Dict keys:
                'lines': numpy.array of shape [num_pred, num_points, 2]
                'scores': numpy.array of shape [num_pred, ]
                    after sigmoid
                'labels': numpy.array of shape [num_pred, ]
                    dtype=long
        '''
        preds = preds_dict[-1]
        batched_cls_scores = preds['scores']
        batched_lines_preds = preds['lines']
        batch_size = batched_cls_scores.size(0)
        ret_list = []
        for i in range(len(tokens)):
            cls_scores = batched_cls_scores[i]
            lines_preds = batched_lines_preds[i]
            max_num = self.max_lines
            if cls_scores.shape[-1] > self.num_classes:
                scores, labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
                final_scores, bbox_index = scores.topk(self.max_lines)
                final_lines = lines_preds[bbox_index]
                final_labels = labels[bbox_index]
            else:
                cls_scores = cls_scores.sigmoid()
                final_scores, indexes = cls_scores.view(-1).topk(self.max_lines)
                final_labels = indexes % self.num_classes
                bbox_index = indexes // self.num_classes
                final_lines = lines_preds[bbox_index]
            ret_dict_single = {
                'token': tokens[i],
                'lines': final_lines.detach().cpu().numpy() * 2 - 1,
                'scores': final_scores.detach().cpu().numpy(),
                'labels': final_labels.detach().cpu().numpy(),
                'nline': len(final_lines),
            }
            if gts is not None:
                lines_gt = gts['lines'][i].detach().cpu().numpy()
                labels_gt = gts['labels'][i].detach().cpu().numpy()
                ret_dict_single['groundTruth'] = {
                    'token': tokens[i],
                    'nline': lines_gt.shape[0],
                    'labels': labels_gt,
                    'lines': lines_gt * 2 - 1,
                }
                # if (labels_gt==1).any():
                #     import ipdb; ipdb.set_trace()
            ret_list.append(ret_dict_single)
        return ret_list