UpDate README.md

f3b13cad · yeshenglong1 · 0797920d · f3b13cad · f3b13cad · f3b13cad
Commit f3b13cad authored May 17, 2023 by yeshenglong1
20 changed files
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/loading.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/loading.py
-import mmcv
-import numpy as np
-from mmdet.datasets.builder import PIPELINES
-
-@PIPELINES.register_module(force=True)
-class LoadMultiViewImagesFromFiles(object):
-    """Load multi channel images from a list of separate channel files.
-
-    Expects results['img_filename'] to be a list of filenames.
-
-    Args:
-        to_float32 (bool): Whether to convert the img to float32.
-            Defaults to False.
-        color_type (str): Color type of the file. Defaults to 'unchanged'.
-    """
-
-    def __init__(self, to_float32=False, color_type='unchanged'):
-        self.to_float32 = to_float32
-        self.color_type = color_type
-
-    def __call__(self, results):
-        """Call function to load multi-view image from files.
-
-        Args:
-            results (dict): Result dict containing multi-view image filenames.
-
-        Returns:
-            dict: The result dict containing the multi-view image data. \
-                Added keys and values are described below.
-
-                - filename (str): Multi-view image filenames.
-                - img (np.ndarray): Multi-view image arrays.
-                - img_shape (tuple[int]): Shape of multi-view image arrays.
-                - ori_shape (tuple[int]): Shape of original image arrays.
-                - pad_shape (tuple[int]): Shape of padded image arrays.
-                - scale_factor (float): Scale factor.
-                - img_norm_cfg (dict): Normalization configuration of images.
-        """
-        filename = results['img_filenames']
-        img = [mmcv.imread(name, self.color_type) for name in filename]
-        if self.to_float32:
-            img = [i.astype(np.float32) for i in img]
-        results['img'] = img
-        results['img_shape'] = [i.shape for i in img]
-        results['ori_shape'] = [i.shape for i in img]
-        # Set initial values for default meta_keys
-        results['pad_shape'] = [i.shape for i in img]
-        # results['scale_factor'] = 1.0
-        num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2]
-        results['img_norm_cfg'] = dict(
-            mean=np.zeros(num_channels, dtype=np.float32),
-            std=np.ones(num_channels, dtype=np.float32),
-            to_rgb=False)
-        results['img_fields'] = ['img']
-        return results
-
-    def __repr__(self):
-        """str: Return a string that describes the module."""
-        return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\
-            f"color_type='{self.color_type}')"
+import mmcv
+import numpy as np
+from mmdet.datasets.builder import PIPELINES
+
+@PIPELINES.register_module(force=True)
+class LoadMultiViewImagesFromFiles(object):
+    """Load multi channel images from a list of separate channel files.
+
+    Expects results['img_filename'] to be a list of filenames.
+
+    Args:
+        to_float32 (bool): Whether to convert the img to float32.
+            Defaults to False.
+        color_type (str): Color type of the file. Defaults to 'unchanged'.
+    """
+
+    def __init__(self, to_float32=False, color_type='unchanged'):
+        self.to_float32 = to_float32
+        self.color_type = color_type
+
+    def __call__(self, results):
+        """Call function to load multi-view image from files.
+
+        Args:
+            results (dict): Result dict containing multi-view image filenames.
+
+        Returns:
+            dict: The result dict containing the multi-view image data. \
+                Added keys and values are described below.
+
+                - filename (str): Multi-view image filenames.
+                - img (np.ndarray): Multi-view image arrays.
+                - img_shape (tuple[int]): Shape of multi-view image arrays.
+                - ori_shape (tuple[int]): Shape of original image arrays.
+                - pad_shape (tuple[int]): Shape of padded image arrays.
+                - scale_factor (float): Scale factor.
+                - img_norm_cfg (dict): Normalization configuration of images.
+        """
+        filename = results['img_filenames']
+        img = [mmcv.imread(name, self.color_type) for name in filename]
+        if self.to_float32:
+            img = [i.astype(np.float32) for i in img]
+        results['img'] = img
+        results['img_shape'] = [i.shape for i in img]
+        results['ori_shape'] = [i.shape for i in img]
+        # Set initial values for default meta_keys
+        results['pad_shape'] = [i.shape for i in img]
+        # results['scale_factor'] = 1.0
+        num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2]
+        results['img_norm_cfg'] = dict(
+            mean=np.zeros(num_channels, dtype=np.float32),
+            std=np.ones(num_channels, dtype=np.float32),
+            to_rgb=False)
+        results['img_fields'] = ['img']
+        return results
+
+    def __repr__(self):
+        """str: Return a string that describes the module."""
+        return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\
+            f"color_type='{self.color_type}')"
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/poly_bbox.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/poly_bbox.py
-import numpy as np
-
-from mmdet.datasets.builder import PIPELINES
-from shapely.geometry import LineString
-
-@PIPELINES.register_module(force=True)
-class PolygonizeLocalMapBbox(object):
-    """Pre-Processing used by vectormapnet model.
-
-    Args:
-        canvas_size (tuple or list): bev feature size
-        coord_dim (int): dimension of point's coordinate
-        num_class (int): number of classes
-        threshold (float): threshold for minimum bounding box size
-    """
-
-    def __init__(self,
-                 canvas_size=(200, 100),
-                 coord_dim=2,
-                 num_class=3,
-                 threshold=6/200,
-                 ):
-
-        self.canvas_size = np.array(canvas_size)
-
-        self.num_class = num_class
-
-        # for keypoints
-        self.threshold = threshold
-
-        self.coord_dim = coord_dim
-
-        self.map_stop_idx = 0
-        self.coord_dim_start_idx = 1
-
-    def format_polyline_map(self, vectors):
-
-        polylines, polyline_masks, polyline_weights = [], [], []
-
-        # quantilize each label's lines individually.
-        for label, _lines in vectors.items():
-            for polyline in _lines:
-                # and pad polyline.
-                if label == 2:
-                    polyline_weight = evaluate_line(polyline).reshape(-1)
-                else:
-                    polyline_weight = np.ones_like(polyline).reshape(-1)
-                    polyline_weight = np.pad(
-                        polyline_weight, ((0, 1),), constant_values=1.)
-                    polyline_weight = polyline_weight/polyline_weight.sum()
-
-                # flatten and quantilized
-                fpolyline = quantize_verts(
-                    polyline, self.canvas_size, self.coord_dim)
-
-                fpolyline = fpolyline.reshape(-1)
-
-                # reindex starting from 1, and add a zero stopping token(EOS),
-                fpolyline = \
-                    np.pad(fpolyline + self.coord_dim_start_idx, ((0, 1),),
-                            constant_values=0)
-                fpolyline_msk = np.ones(fpolyline.shape, dtype=np.bool)
-
-                polyline_masks.append(fpolyline_msk)
-                polyline_weights.append(polyline_weight)
-                polylines.append(fpolyline)
-
-        polyline_map = polylines
-        polyline_map_mask = polyline_masks
-        polyline_map_weights = polyline_weights
-
-        return polyline_map, polyline_map_mask, polyline_map_weights
-
-    def format_keypoint(self, vectors):
-
-        kps, kp_labels = [], []
-        qkps, qkp_masks = [], []
-
-        # quantilize each label's lines individually.
-        for label, _lines in vectors.items():
-            for polyline in _lines:
-                kp = get_bbox(polyline, self.threshold)
-                kps.append(kp)
-                kp_labels.append(label)
-
-                gkp = kp
-
-                # flatten and quantilized
-                fkp = quantize_verts(gkp, self.canvas_size, self.coord_dim)
-                fkp = fkp.reshape(-1)
-
-                fkps_msk = np.ones(fkp.shape, dtype=np.bool)
-
-                qkp_masks.append(fkps_msk)
-                qkps.append(fkp)
-
-        qkps = np.stack(qkps)
-        qkp_msks = np.stack(qkp_masks)
-
-        # format det
-        kps = np.stack(kps, axis=0).astype(np.float32)*self.canvas_size
-        kp_labels = np.array(kp_labels)
-        # restrict the boundary
-        kps[..., 0] = np.clip(kps[..., 0], 0.1, self.canvas_size[0]-0.1)
-        kps[..., 1] = np.clip(kps[..., 1], 0.1, self.canvas_size[1]-0.1)
-
-        # nbox, boxsize(4)*coord_dim(2)
-        kps = kps.reshape(kps.shape[0], -1)
-        # unflatten_seq(qkps)
-
-        return kps, kp_labels, qkps, qkp_msks,
-
-    def Polygonization(self, input_dict):
-        '''
-            Process vertices.
-        '''
-        
-        vectors = input_dict['vectors']
-
-        n_lines = 0
-        for label, lines in vectors.items():
-            n_lines += len(lines)
-        if not n_lines:
-            input_dict['polys'] = []
-            return input_dict
-
-        polyline_map, polyline_map_mask, polyline_map_weight = \
-            self.format_polyline_map(vectors)
-
-        keypoint, keypoint_label, qkeypoint, qkeypoint_mask = \
-            self.format_keypoint(vectors)
-
-        # gather
-        polys = {
-            # for det
-            'keypoint': keypoint,
-            'det_label': keypoint_label,
-
-            # for gen
-            'gen_label': keypoint_label,
-            'qkeypoint': qkeypoint,
-            'qkeypoint_mask': qkeypoint_mask,
-
-            'polylines': polyline_map,  # List[array]
-            'polyline_masks': polyline_map_mask,  # List[array]
-            'polyline_weights': polyline_map_weight
-        }
-
-        # Format outputs
-        input_dict['polys'] = polys
-
-        return input_dict
-
-    def __call__(self, input_dict):
-        input_dict = self.Polygonization(input_dict)
-        return input_dict
-
-
-def evaluate_line(polyline):
-
-    edge = np.linalg.norm(polyline[1:] - polyline[:-1], axis=-1)
-
-    start_end_weight = edge[(0, -1), ].copy()
-    mid_weight = (edge[:-1] + edge[1:]) * .5
-
-    pts_weight = np.concatenate(
-        (start_end_weight[:1], mid_weight, start_end_weight[-1:]))
-
-    denominator = pts_weight.sum()
-    denominator = 1 if denominator == 0 else denominator
-
-    pts_weight /= denominator
-
-    # add weights for stop index
-    pts_weight = np.repeat(pts_weight, 2)/2
-    pts_weight = np.pad(pts_weight, ((0, 1)),
-                        constant_values=1/(len(polyline)*2))
-
-    return pts_weight
-
-
-def quantize_verts(verts, canvas_size, coord_dim):
-    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
-    
-    Args:
-        verts (array): vertices coordinates, shape (seqlen, coords_dim)
-        canvas_size (tuple): bev feature size
-        coord_dim (int): dimension of point coordinates
-
-    Returns:
-        quantized_verts (array): quantized vertices, shape (seqlen, coords_dim)
-    """
-
-    min_range = 0
-    max_range = 1
-    range_quantize = np.array(canvas_size) - 1  # (0-199) = 200
-
-    verts_ratio = (verts[:, :coord_dim] - min_range) / (
-        max_range - min_range)
-    verts_quantize = verts_ratio * range_quantize[:coord_dim]
-
-    return verts_quantize.astype('int32')
-
-
-def get_bbox(polyline, threshold):
-    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
-    
-    Args:
-        polyline (array): point coordinates, shape (seqlen, 2)
-        threshold (float): threshold for minimum bbox size
-    
-    Returns:
-        bbox (array): bounding box in xyxy format, shape (2, 2)
-    """
-    eps = 1e-4
-    polyline = LineString(polyline)
-    bbox = polyline.bounds
-    minx, miny, maxx, maxy = bbox
-    W, H = maxx-minx, maxy-miny
-
-    if W < threshold or H < threshold:
-        remain = max((threshold - min(W, H))/2, eps)
-        bbox = polyline.buffer(remain).envelope.bounds
-        minx, miny, maxx, maxy = bbox
-
-    bbox_np = np.array([[minx, miny], [maxx, maxy]])
-    bbox_np = np.clip(bbox_np, 0., 1.)
-
+import numpy as np
+
+from mmdet.datasets.builder import PIPELINES
+from shapely.geometry import LineString
+
+@PIPELINES.register_module(force=True)
+class PolygonizeLocalMapBbox(object):
+    """Pre-Processing used by vectormapnet model.
+
+    Args:
+        canvas_size (tuple or list): bev feature size
+        coord_dim (int): dimension of point's coordinate
+        num_class (int): number of classes
+        threshold (float): threshold for minimum bounding box size
+    """
+
+    def __init__(self,
+                 canvas_size=(200, 100),
+                 coord_dim=2,
+                 num_class=3,
+                 threshold=6/200,
+                 ):
+
+        self.canvas_size = np.array(canvas_size)
+
+        self.num_class = num_class
+
+        # for keypoints
+        self.threshold = threshold
+
+        self.coord_dim = coord_dim
+
+        self.map_stop_idx = 0
+        self.coord_dim_start_idx = 1
+
+    def format_polyline_map(self, vectors):
+
+        polylines, polyline_masks, polyline_weights = [], [], []
+
+        # quantilize each label's lines individually.
+        for label, _lines in vectors.items():
+            for polyline in _lines:
+                # and pad polyline.
+                if label == 2:
+                    polyline_weight = evaluate_line(polyline).reshape(-1)
+                else:
+                    polyline_weight = np.ones_like(polyline).reshape(-1)
+                    polyline_weight = np.pad(
+                        polyline_weight, ((0, 1),), constant_values=1.)
+                    polyline_weight = polyline_weight/polyline_weight.sum()
+
+                # flatten and quantilized
+                fpolyline = quantize_verts(
+                    polyline, self.canvas_size, self.coord_dim)
+
+                fpolyline = fpolyline.reshape(-1)
+
+                # reindex starting from 1, and add a zero stopping token(EOS),
+                fpolyline = \
+                    np.pad(fpolyline + self.coord_dim_start_idx, ((0, 1),),
+                            constant_values=0)
+                fpolyline_msk = np.ones(fpolyline.shape, dtype=np.bool)
+
+                polyline_masks.append(fpolyline_msk)
+                polyline_weights.append(polyline_weight)
+                polylines.append(fpolyline)
+
+        polyline_map = polylines
+        polyline_map_mask = polyline_masks
+        polyline_map_weights = polyline_weights
+
+        return polyline_map, polyline_map_mask, polyline_map_weights
+
+    def format_keypoint(self, vectors):
+
+        kps, kp_labels = [], []
+        qkps, qkp_masks = [], []
+
+        # quantilize each label's lines individually.
+        for label, _lines in vectors.items():
+            for polyline in _lines:
+                kp = get_bbox(polyline, self.threshold)
+                kps.append(kp)
+                kp_labels.append(label)
+
+                gkp = kp
+
+                # flatten and quantilized
+                fkp = quantize_verts(gkp, self.canvas_size, self.coord_dim)
+                fkp = fkp.reshape(-1)
+
+                fkps_msk = np.ones(fkp.shape, dtype=np.bool)
+
+                qkp_masks.append(fkps_msk)
+                qkps.append(fkp)
+
+        qkps = np.stack(qkps)
+        qkp_msks = np.stack(qkp_masks)
+
+        # format det
+        kps = np.stack(kps, axis=0).astype(np.float32)*self.canvas_size
+        kp_labels = np.array(kp_labels)
+        # restrict the boundary
+        kps[..., 0] = np.clip(kps[..., 0], 0.1, self.canvas_size[0]-0.1)
+        kps[..., 1] = np.clip(kps[..., 1], 0.1, self.canvas_size[1]-0.1)
+
+        # nbox, boxsize(4)*coord_dim(2)
+        kps = kps.reshape(kps.shape[0], -1)
+        # unflatten_seq(qkps)
+
+        return kps, kp_labels, qkps, qkp_msks,
+
+    def Polygonization(self, input_dict):
+        '''
+            Process vertices.
+        '''
+        
+        vectors = input_dict['vectors']
+
+        n_lines = 0
+        for label, lines in vectors.items():
+            n_lines += len(lines)
+        if not n_lines:
+            input_dict['polys'] = []
+            return input_dict
+
+        polyline_map, polyline_map_mask, polyline_map_weight = \
+            self.format_polyline_map(vectors)
+
+        keypoint, keypoint_label, qkeypoint, qkeypoint_mask = \
+            self.format_keypoint(vectors)
+
+        # gather
+        polys = {
+            # for det
+            'keypoint': keypoint,
+            'det_label': keypoint_label,
+
+            # for gen
+            'gen_label': keypoint_label,
+            'qkeypoint': qkeypoint,
+            'qkeypoint_mask': qkeypoint_mask,
+
+            'polylines': polyline_map,  # List[array]
+            'polyline_masks': polyline_map_mask,  # List[array]
+            'polyline_weights': polyline_map_weight
+        }
+
+        # Format outputs
+        input_dict['polys'] = polys
+
+        return input_dict
+
+    def __call__(self, input_dict):
+        input_dict = self.Polygonization(input_dict)
+        return input_dict
+
+
+def evaluate_line(polyline):
+
+    edge = np.linalg.norm(polyline[1:] - polyline[:-1], axis=-1)
+
+    start_end_weight = edge[(0, -1), ].copy()
+    mid_weight = (edge[:-1] + edge[1:]) * .5
+
+    pts_weight = np.concatenate(
+        (start_end_weight[:1], mid_weight, start_end_weight[-1:]))
+
+    denominator = pts_weight.sum()
+    denominator = 1 if denominator == 0 else denominator
+
+    pts_weight /= denominator
+
+    # add weights for stop index
+    pts_weight = np.repeat(pts_weight, 2)/2
+    pts_weight = np.pad(pts_weight, ((0, 1)),
+                        constant_values=1/(len(polyline)*2))
+
+    return pts_weight
+
+
+def quantize_verts(verts, canvas_size, coord_dim):
+    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
+    
+    Args:
+        verts (array): vertices coordinates, shape (seqlen, coords_dim)
+        canvas_size (tuple): bev feature size
+        coord_dim (int): dimension of point coordinates
+
+    Returns:
+        quantized_verts (array): quantized vertices, shape (seqlen, coords_dim)
+    """
+
+    min_range = 0
+    max_range = 1
+    range_quantize = np.array(canvas_size) - 1  # (0-199) = 200
+
+    verts_ratio = (verts[:, :coord_dim] - min_range) / (
+        max_range - min_range)
+    verts_quantize = verts_ratio * range_quantize[:coord_dim]
+
+    return verts_quantize.astype('int32')
+
+
+def get_bbox(polyline, threshold):
+    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
+    
+    Args:
+        polyline (array): point coordinates, shape (seqlen, 2)
+        threshold (float): threshold for minimum bbox size
+    
+    Returns:
+        bbox (array): bounding box in xyxy format, shape (2, 2)
+    """
+    eps = 1e-4
+    polyline = LineString(polyline)
+    bbox = polyline.bounds
+    minx, miny, maxx, maxy = bbox
+    W, H = maxx-minx, maxy-miny
+
+    if W < threshold or H < threshold:
+        remain = max((threshold - min(W, H))/2, eps)
+        bbox = polyline.buffer(remain).envelope.bounds
+        minx, miny, maxx, maxy = bbox
+
+    bbox_np = np.array([[minx, miny], [maxx, maxy]])
+    bbox_np = np.clip(bbox_np, 0., 1.)
+
    return bbox_np
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/transform.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/transform.py
-import numpy as np
-import mmcv
-
-from mmdet.datasets.builder import PIPELINES
-
-
-@PIPELINES.register_module(force=True)
-class Normalize3D(object):
-    """Normalize the image.
-    Added key is "img_norm_cfg".
-    Args:
-        mean (sequence): Mean values of 3 channels.
-        std (sequence): Std values of 3 channels.
-        to_rgb (bool): Whether to convert the image from BGR to RGB,
-            default is true.
-    """
-
-    def __init__(self, mean, std, to_rgb=True):
-        self.mean = np.array(mean, dtype=np.float32)
-        self.std = np.array(std, dtype=np.float32)
-        self.to_rgb = to_rgb
-
-    def __call__(self, results):
-        """Call function to normalize images.
-        Args:
-            results (dict): Result dict from loading pipeline.
-        Returns:
-            dict: Normalized results, 'img_norm_cfg' key is added into
-                result dict.
-        """
-        for key in results.get('img_fields', ['img']):
-            results[key] = [mmcv.imnormalize(
-                img, self.mean, self.std, self.to_rgb) for img in results[key]]
-        results['img_norm_cfg'] = dict(
-            mean=self.mean, std=self.std, to_rgb=self.to_rgb)
-        return results
-
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'
-        return repr_str
-
-
-@PIPELINES.register_module(force=True)
-class PadMultiViewImages(object):
-    """Pad multi-view images and change intrinsics
-    There are two padding modes: (1) pad to a fixed size and (2) pad to the
-    minimum size that is divisible by some number.
-    Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
-    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed.
-
-    Args:
-        size (tuple, optional): Fixed padding size, (h, w).
-        size_divisor (int, optional): The divisor of padded size.
-        pad_val (float, optional): Padding value, 0 by default.
-        change_intrinsics (bool): whether to update intrinsics.
-    """
-
-    def __init__(self, size=None, size_divisor=None, pad_val=0, change_intrinsics=False):
-        self.size = size
-        self.size_divisor = size_divisor
-        self.pad_val = pad_val
-        # only one of size and size_divisor should be valid
-        assert size is not None or size_divisor is not None
-        assert size is None or size_divisor is None
-
-        self.change_intrinsics = change_intrinsics
-
-    def _pad_img(self, results):
-        """Pad images according to ``self.size``."""
-        original_shape = [img.shape for img in results['img']]
-
-        for key in results.get('img_fields', ['img']):
-            if self.size is not None:
-                padded_img = [mmcv.impad(
-                    img, shape=self.size, pad_val=self.pad_val) for img in results[key]]
-            elif self.size_divisor is not None:
-                padded_img = [mmcv.impad_to_multiple(
-                    img, self.size_divisor, pad_val=self.pad_val) for img in results[key]]
-            results[key] = padded_img
-
-        if self.change_intrinsics:
-            post_intrinsics, post_ego2imgs = [], []
-            for img, oshape, cam_intrinsic, ego2img in zip(results['img'], \
-                    original_shape, results['cam_intrinsics'], results['ego2img']):
-                scaleW = img.shape[1] / oshape[1]
-                scaleH = img.shape[0] / oshape[0]
-
-                rot_resize_matrix = np.array([ 
-                                        [scaleW, 0,      0,    0],
-                                        [0,      scaleH, 0,    0],
-                                        [0,      0,      1,    0],
-                                        [0,      0,      0,    1]])
-                post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic
-                post_ego2img = rot_resize_matrix @ ego2img
-                post_intrinsics.append(post_intrinsic)
-                post_ego2imgs.append(post_ego2img)
-        
-            results.update({
-                'cam_intrinsics': post_intrinsics,
-                'ego2img': post_ego2imgs,
-            })
-
-
-        results['img_shape'] = [img.shape for img in padded_img]
-        results['img_fixed_size'] = self.size
-        results['img_size_divisor'] = self.size_divisor
-
-    def __call__(self, results):
-        """Call function to pad images, masks, semantic segmentation maps.
-        Args:
-            results (dict): Result dict from loading pipeline.
-        Returns:
-            dict: Updated result dict.
-        """
-        self._pad_img(results)
-        return results
-
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f'(size={self.size}, '
-        repr_str += f'size_divisor={self.size_divisor}, '
-        repr_str += f'pad_val={self.pad_val})'
-        repr_str += f'change_intrinsics={self.change_intrinsics})'
-
-        return repr_str
-
-
-@PIPELINES.register_module(force=True)
-class ResizeMultiViewImages(object):
-    """Resize mulit-view images and change intrinsics
-    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed
-
-    Args:
-        size (tuple, optional): resize target size, (h, w).
-        change_intrinsics (bool): whether to update intrinsics.
-    """
-    def __init__(self, size, change_intrinsics=True):
-        self.size = size
-        self.change_intrinsics = change_intrinsics
-
-    def __call__(self, results:dict):
-
-        new_imgs, post_intrinsics, post_ego2imgs = [], [], []
-
-        for img,  cam_intrinsic, ego2img in zip(results['img'], \
-                results['cam_intrinsics'], results['ego2img']):
-            tmp, scaleW, scaleH = mmcv.imresize(img,
-                                                # NOTE: mmcv.imresize expect (w, h) shape
-                                                (self.size[1], self.size[0]),
-                                                return_scale=True)
-            new_imgs.append(tmp)
-
-            rot_resize_matrix = np.array([
-                [scaleW, 0,      0,    0],
-                [0,      scaleH, 0,    0],
-                [0,      0,      1,    0],
-                [0,      0,      0,    1]])
-            post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic
-            post_ego2img = rot_resize_matrix @ ego2img
-            post_intrinsics.append(post_intrinsic)
-            post_ego2imgs.append(post_ego2img)
-
-        results['img'] = new_imgs
-        results['img_shape'] = [img.shape for img in new_imgs]
-        if self.change_intrinsics:
-            results.update({
-                'cam_intrinsics': post_intrinsics,
-                'ego2img': post_ego2imgs,
-            })
-
-        return results
-    
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f'(size={self.size}, '
-        repr_str += f'change_intrinsics={self.change_intrinsics})'
-
+import numpy as np
+import mmcv
+
+from mmdet.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module(force=True)
+class Normalize3D(object):
+    """Normalize the image.
+    Added key is "img_norm_cfg".
+    Args:
+        mean (sequence): Mean values of 3 channels.
+        std (sequence): Std values of 3 channels.
+        to_rgb (bool): Whether to convert the image from BGR to RGB,
+            default is true.
+    """
+
+    def __init__(self, mean, std, to_rgb=True):
+        self.mean = np.array(mean, dtype=np.float32)
+        self.std = np.array(std, dtype=np.float32)
+        self.to_rgb = to_rgb
+
+    def __call__(self, results):
+        """Call function to normalize images.
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            dict: Normalized results, 'img_norm_cfg' key is added into
+                result dict.
+        """
+        for key in results.get('img_fields', ['img']):
+            results[key] = [mmcv.imnormalize(
+                img, self.mean, self.std, self.to_rgb) for img in results[key]]
+        results['img_norm_cfg'] = dict(
+            mean=self.mean, std=self.std, to_rgb=self.to_rgb)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'
+        return repr_str
+
+
+@PIPELINES.register_module(force=True)
+class PadMultiViewImages(object):
+    """Pad multi-view images and change intrinsics
+    There are two padding modes: (1) pad to a fixed size and (2) pad to the
+    minimum size that is divisible by some number.
+    Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor",
+    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed.
+
+    Args:
+        size (tuple, optional): Fixed padding size, (h, w).
+        size_divisor (int, optional): The divisor of padded size.
+        pad_val (float, optional): Padding value, 0 by default.
+        change_intrinsics (bool): whether to update intrinsics.
+    """
+
+    def __init__(self, size=None, size_divisor=None, pad_val=0, change_intrinsics=False):
+        self.size = size
+        self.size_divisor = size_divisor
+        self.pad_val = pad_val
+        # only one of size and size_divisor should be valid
+        assert size is not None or size_divisor is not None
+        assert size is None or size_divisor is None
+
+        self.change_intrinsics = change_intrinsics
+
+    def _pad_img(self, results):
+        """Pad images according to ``self.size``."""
+        original_shape = [img.shape for img in results['img']]
+
+        for key in results.get('img_fields', ['img']):
+            if self.size is not None:
+                padded_img = [mmcv.impad(
+                    img, shape=self.size, pad_val=self.pad_val) for img in results[key]]
+            elif self.size_divisor is not None:
+                padded_img = [mmcv.impad_to_multiple(
+                    img, self.size_divisor, pad_val=self.pad_val) for img in results[key]]
+            results[key] = padded_img
+
+        if self.change_intrinsics:
+            post_intrinsics, post_ego2imgs = [], []
+            for img, oshape, cam_intrinsic, ego2img in zip(results['img'], \
+                    original_shape, results['cam_intrinsics'], results['ego2img']):
+                scaleW = img.shape[1] / oshape[1]
+                scaleH = img.shape[0] / oshape[0]
+
+                rot_resize_matrix = np.array([ 
+                                        [scaleW, 0,      0,    0],
+                                        [0,      scaleH, 0,    0],
+                                        [0,      0,      1,    0],
+                                        [0,      0,      0,    1]])
+                post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic
+                post_ego2img = rot_resize_matrix @ ego2img
+                post_intrinsics.append(post_intrinsic)
+                post_ego2imgs.append(post_ego2img)
+        
+            results.update({
+                'cam_intrinsics': post_intrinsics,
+                'ego2img': post_ego2imgs,
+            })
+
+
+        results['img_shape'] = [img.shape for img in padded_img]
+        results['img_fixed_size'] = self.size
+        results['img_size_divisor'] = self.size_divisor
+
+    def __call__(self, results):
+        """Call function to pad images, masks, semantic segmentation maps.
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            dict: Updated result dict.
+        """
+        self._pad_img(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(size={self.size}, '
+        repr_str += f'size_divisor={self.size_divisor}, '
+        repr_str += f'pad_val={self.pad_val})'
+        repr_str += f'change_intrinsics={self.change_intrinsics})'
+
+        return repr_str
+
+
+@PIPELINES.register_module(force=True)
+class ResizeMultiViewImages(object):
+    """Resize mulit-view images and change intrinsics
+    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed
+
+    Args:
+        size (tuple, optional): resize target size, (h, w).
+        change_intrinsics (bool): whether to update intrinsics.
+    """
+    def __init__(self, size, change_intrinsics=True):
+        self.size = size
+        self.change_intrinsics = change_intrinsics
+
+    def __call__(self, results:dict):
+
+        new_imgs, post_intrinsics, post_ego2imgs = [], [], []
+
+        for img,  cam_intrinsic, ego2img in zip(results['img'], \
+                results['cam_intrinsics'], results['ego2img']):
+            tmp, scaleW, scaleH = mmcv.imresize(img,
+                                                # NOTE: mmcv.imresize expect (w, h) shape
+                                                (self.size[1], self.size[0]),
+                                                return_scale=True)
+            new_imgs.append(tmp)
+
+            rot_resize_matrix = np.array([
+                [scaleW, 0,      0,    0],
+                [0,      scaleH, 0,    0],
+                [0,      0,      1,    0],
+                [0,      0,      0,    1]])
+            post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic
+            post_ego2img = rot_resize_matrix @ ego2img
+            post_intrinsics.append(post_intrinsic)
+            post_ego2imgs.append(post_ego2img)
+
+        results['img'] = new_imgs
+        results['img_shape'] = [img.shape for img in new_imgs]
+        if self.change_intrinsics:
+            results.update({
+                'cam_intrinsics': post_intrinsics,
+                'ego2img': post_ego2imgs,
+            })
+
+        return results
+    
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(size={self.size}, '
+        repr_str += f'change_intrinsics={self.change_intrinsics})'
+
        return repr_str
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/vectorize.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/datasets/pipelines/vectorize.py
-import numpy as np
-from mmdet.datasets.builder import PIPELINES
-from shapely.geometry import LineString
-from numpy.typing import NDArray
-from typing import List, Tuple, Union, Dict
-
-@PIPELINES.register_module(force=True)
-class VectorizeMap(object):
-    """Generate vectoized map and put into `semantic_mask` key.
-    Concretely, shapely geometry objects are converted into sample points (ndarray).
-    We use args `sample_num`, `sample_dist`, `simplify` to specify sampling method.
-
-    Args:
-        roi_size (tuple or list): bev range .
-        normalize (bool): whether to normalize points to range (0, 1).
-        coords_dim (int): dimension of point coordinates.
-        simplify (bool): whether to use simpily function. If true, `sample_num` \
-            and `sample_dist` will be ignored.
-        sample_num (int): number of points to interpolate from a polyline. Set to -1 to ignore.
-        sample_dist (float): interpolate distance. Set to -1 to ignore.
-    """
-
-    def __init__(self, 
-                 roi_size: Union[Tuple, List], 
-                 normalize: bool,
-                 coords_dim: int,
-                 simplify: bool=False, 
-                 sample_num: int=-1, 
-                 sample_dist: float=-1, 
-        ):
-        self.coords_dim = coords_dim
-        self.sample_num = sample_num
-        self.sample_dist = sample_dist
-        self.roi_size = np.array(roi_size)
-        self.normalize = normalize
-        self.simplify = simplify
-        self.sample_fn = None
-
-        if sample_dist > 0:
-            assert sample_num < 0 and not simplify
-            self.sample_fn = self.interp_fixed_dist
-        if sample_num > 0:
-            assert sample_dist < 0 and not simplify
-            self.sample_fn = self.interp_fixed_num
-
-    def interp_fixed_num(self, line: LineString) -> NDArray:
-        ''' Interpolate a line to fixed number of points.
-        
-        Args:
-            line (LineString): line
-        
-        Returns:
-            points (array): interpolated points, shape (N, 2)
-        '''
-
-        distances = np.linspace(0, line.length, self.sample_num)
-        sampled_points = np.array([list(line.interpolate(distance).coords) 
-            for distance in distances]).squeeze()
-
-        return sampled_points
-
-    def interp_fixed_dist(self, line: LineString) -> NDArray:
-        ''' Interpolate a line at fixed interval.
-        
-        Args:
-            line (LineString): line
-        
-        Returns:
-            points (array): interpolated points, shape (N, 2)
-        '''
-
-        distances = list(np.arange(self.sample_dist, line.length, self.sample_dist))
-        # make sure to sample at least two points when sample_dist > line.length
-        distances = [0,] + distances + [line.length,] 
-        
-        sampled_points = np.array([list(line.interpolate(distance).coords)
-                                for distance in distances]).squeeze()
-        
-        return sampled_points
-    
-    def get_vectorized_lines(self, map_geoms: Dict) -> Dict:
-        ''' Vectorize map elements. Iterate over the input dict and apply the 
-        specified sample funcion.
-        
-        Args:
-            line (LineString): line
-        
-        Returns:
-            vectors (array): dict of vectorized map elements.
-        '''
-
-        vectors = {}
-        for label, geom_list in map_geoms.items():
-            vectors[label] = []
-            for geom in geom_list:
-                if geom.geom_type == 'LineString':
-                    geom = LineString(np.array(geom.coords)[:, :self.coords_dim])
-                    if self.simplify:
-                        line = geom.simplify(0.2, preserve_topology=True)
-                        line = np.array(line.coords)
-                    elif self.sample_fn:
-                        line = self.sample_fn(geom)
-                    else:
-                        line = np.array(line.coords)
-
-                    if self.normalize:
-                        line = self.normalize_line(line)
-                    vectors[label].append(line)
-
-                elif geom.geom_type == 'Polygon':
-                    # polygon objects will not be vectorized
-                    continue
-                
-                else:
-                    raise ValueError('map geoms must be either LineString or Polygon!')
-        return vectors
-    
-    def normalize_line(self, line: NDArray) -> NDArray:
-        ''' Convert points to range (0, 1).
-        
-        Args:
-            line (LineString): line
-        
-        Returns:
-            normalized (array): normalized points.
-        '''
-
-        origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
-
-        line[:, :2] = line[:, :2] - origin
-
-        # transform from range [0, 1] to (0, 1)
-        eps = 2
-        line[:, :2] = line[:, :2] / (self.roi_size + eps)
-
-        return line
-    
-    def __call__(self, input_dict):
-        map_geoms = input_dict['map_geoms']
-
-        input_dict['vectors'] = self.get_vectorized_lines(map_geoms)
-        return input_dict
-
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f'(simplify={self.simplify}, '
-        repr_str += f'sample_num={self.sample_num}), '
-        repr_str += f'sample_dist={self.sample_dist}), ' 
-        repr_str += f'roi_size={self.roi_size})'
-        repr_str += f'normalize={self.normalize})'
-        repr_str += f'coords_dim={self.coords_dim})'
-
+import numpy as np
+from mmdet.datasets.builder import PIPELINES
+from shapely.geometry import LineString
+from numpy.typing import NDArray
+from typing import List, Tuple, Union, Dict
+
+@PIPELINES.register_module(force=True)
+class VectorizeMap(object):
+    """Generate vectoized map and put into `semantic_mask` key.
+    Concretely, shapely geometry objects are converted into sample points (ndarray).
+    We use args `sample_num`, `sample_dist`, `simplify` to specify sampling method.
+
+    Args:
+        roi_size (tuple or list): bev range .
+        normalize (bool): whether to normalize points to range (0, 1).
+        coords_dim (int): dimension of point coordinates.
+        simplify (bool): whether to use simpily function. If true, `sample_num` \
+            and `sample_dist` will be ignored.
+        sample_num (int): number of points to interpolate from a polyline. Set to -1 to ignore.
+        sample_dist (float): interpolate distance. Set to -1 to ignore.
+    """
+
+    def __init__(self, 
+                 roi_size: Union[Tuple, List], 
+                 normalize: bool,
+                 coords_dim: int,
+                 simplify: bool=False, 
+                 sample_num: int=-1, 
+                 sample_dist: float=-1, 
+        ):
+        self.coords_dim = coords_dim
+        self.sample_num = sample_num
+        self.sample_dist = sample_dist
+        self.roi_size = np.array(roi_size)
+        self.normalize = normalize
+        self.simplify = simplify
+        self.sample_fn = None
+
+        if sample_dist > 0:
+            assert sample_num < 0 and not simplify
+            self.sample_fn = self.interp_fixed_dist
+        if sample_num > 0:
+            assert sample_dist < 0 and not simplify
+            self.sample_fn = self.interp_fixed_num
+
+    def interp_fixed_num(self, line: LineString) -> NDArray:
+        ''' Interpolate a line to fixed number of points.
+        
+        Args:
+            line (LineString): line
+        
+        Returns:
+            points (array): interpolated points, shape (N, 2)
+        '''
+
+        distances = np.linspace(0, line.length, self.sample_num)
+        sampled_points = np.array([list(line.interpolate(distance).coords) 
+            for distance in distances]).squeeze()
+
+        return sampled_points
+
+    def interp_fixed_dist(self, line: LineString) -> NDArray:
+        ''' Interpolate a line at fixed interval.
+        
+        Args:
+            line (LineString): line
+        
+        Returns:
+            points (array): interpolated points, shape (N, 2)
+        '''
+
+        distances = list(np.arange(self.sample_dist, line.length, self.sample_dist))
+        # make sure to sample at least two points when sample_dist > line.length
+        distances = [0,] + distances + [line.length,] 
+        
+        sampled_points = np.array([list(line.interpolate(distance).coords)
+                                for distance in distances]).squeeze()
+        
+        return sampled_points
+    
+    def get_vectorized_lines(self, map_geoms: Dict) -> Dict:
+        ''' Vectorize map elements. Iterate over the input dict and apply the 
+        specified sample funcion.
+        
+        Args:
+            line (LineString): line
+        
+        Returns:
+            vectors (array): dict of vectorized map elements.
+        '''
+
+        vectors = {}
+        for label, geom_list in map_geoms.items():
+            vectors[label] = []
+            for geom in geom_list:
+                if geom.geom_type == 'LineString':
+                    geom = LineString(np.array(geom.coords)[:, :self.coords_dim])
+                    if self.simplify:
+                        line = geom.simplify(0.2, preserve_topology=True)
+                        line = np.array(line.coords)
+                    elif self.sample_fn:
+                        line = self.sample_fn(geom)
+                    else:
+                        line = np.array(line.coords)
+
+                    if self.normalize:
+                        line = self.normalize_line(line)
+                    vectors[label].append(line)
+
+                elif geom.geom_type == 'Polygon':
+                    # polygon objects will not be vectorized
+                    continue
+                
+                else:
+                    raise ValueError('map geoms must be either LineString or Polygon!')
+        return vectors
+    
+    def normalize_line(self, line: NDArray) -> NDArray:
+        ''' Convert points to range (0, 1).
+        
+        Args:
+            line (LineString): line
+        
+        Returns:
+            normalized (array): normalized points.
+        '''
+
+        origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
+
+        line[:, :2] = line[:, :2] - origin
+
+        # transform from range [0, 1] to (0, 1)
+        eps = 2
+        line[:, :2] = line[:, :2] / (self.roi_size + eps)
+
+        return line
+    
+    def __call__(self, input_dict):
+        map_geoms = input_dict['map_geoms']
+
+        input_dict['vectors'] = self.get_vectorized_lines(map_geoms)
+        return input_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += f'(simplify={self.simplify}, '
+        repr_str += f'sample_num={self.sample_num}), '
+        repr_str += f'sample_dist={self.sample_dist}), ' 
+        repr_str += f'roi_size={self.roi_size})'
+        repr_str += f'normalize={self.normalize})'
+        repr_str += f'coords_dim={self.coords_dim})'
+
        return repr_str
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/__init__.py
-from .backbones import *
-from .heads import *
-from .losses import *
-from .mapers import *
-from .transformer_utils import *
-from .assigner import *
+from .backbones import *
+from .heads import *
+from .losses import *
+from .mapers import *
+from .transformer_utils import *
+from .assigner import *
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/__init__.py
-from .assigner import HungarianLinesAssigner
-from .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, IoUCostC, BBoxCostC, LinesCost, LinesFixNumChamferCost, ClsSigmoidCost
+from .assigner import HungarianLinesAssigner
+from .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, IoUCostC, BBoxCostC, LinesCost, LinesFixNumChamferCost, ClsSigmoidCost
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/assigner.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/assigner.py
-import torch
-
-from mmdet.core.bbox.builder import BBOX_ASSIGNERS
-from mmdet.core.bbox.assigners import AssignResult
-from mmdet.core.bbox.assigners import BaseAssigner
-from mmdet.core.bbox.match_costs import build_match_cost
-
-try:
-    from scipy.optimize import linear_sum_assignment
-except ImportError:
-    linear_sum_assignment = None
-
-
-@BBOX_ASSIGNERS.register_module()
-class HungarianLinesAssigner(BaseAssigner):
-    """
-        Computes one-to-one matching between predictions and ground truth.
-        This class computes an assignment between the targets and the predictions
-        based on the costs. The costs are weighted sum of three components:
-        classification cost and regression L1 cost. The
-        targets don't include the no_object, so generally there are more
-        predictions than targets. After the one-to-one matching, the un-matched
-        are treated as backgrounds. Thus each query prediction will be assigned
-        with `0` or a positive integer indicating the ground truth index:
-        - 0: negative sample, no assigned gt
-        - positive integer: positive sample, index (1-based) of assigned gt
-        Args:
-            cls_weight (int | float, optional): The scale factor for classification
-                cost. Default 1.0.
-            bbox_weight (int | float, optional): The scale factor for regression
-                L1 cost. Default 1.0.
-    """
-
-    def __init__(self,
-                 cost=dict(
-                     type='MapQueriesCost',
-                     cls_cost=dict(type='ClassificationCost', weight=1.),
-                     reg_cost=dict(type='LinesCost', weight=1.0),
-                    ),
-                 pc_range=None, 
-                 **kwargs):
-
-        self.pc_range = pc_range
-        self.cost = build_match_cost(cost)
-
-    def assign(self,
-               preds: dict,
-               gts: dict,
-               gt_bboxes_ignore=None,
-               eps=1e-7):
-        """
-            Computes one-to-one matching based on the weighted costs.
-            This method assign each query prediction to a ground truth or
-            background. The `assigned_gt_inds` with -1 means don't care,
-            0 means negative sample, and positive number is the index (1-based)
-            of assigned gt.
-            The assignment is done in the following steps, the order matters.
-            1. assign every prediction to -1
-            2. compute the weighted costs
-            3. do Hungarian matching on CPU based on the costs
-            4. assign all to 0 (background) first, then for each matched pair
-            between predictions and gts, treat this prediction as foreground
-            and assign the corresponding gt index (plus 1) to it.
-            Args:
-                lines_pred (Tensor): predicted normalized lines:
-                    [num_query, num_points, 2]
-                cls_pred (Tensor): Predicted classification logits, shape
-                    [num_query, num_class].
-
-                Note: when compute bbox l1 loss, velocity is not included!!
-
-                lines_gt (Tensor): Ground truth lines
-
-                    [num_gt, num_points, 2].
-                labels_gt (Tensor): Label of `gt_bboxes`, shape (num_gt,).
-                gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
-                    labelled as `ignored`. Default None.
-                eps (int | float, optional): A value added to the denominator for
-                    numerical stability. Default 1e-7.
-            Returns:
-                :obj:`AssignResult`: The assigned result.
-        """
-        assert gt_bboxes_ignore is None, \
-            'Only case when gt_bboxes_ignore is None is supported.'
-        num_gts, num_lines = gts['lines'].size(0), preds['lines'].size(0)
-
-        # 1. assign -1 by default
-        assigned_gt_inds = \
-            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)
-        assigned_labels = \
-            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)
-
-        if num_gts == 0 or num_lines == 0:
-            # No ground truth or boxes, return empty assignment
-            if num_gts == 0:
-                # No ground truth, assign all to background
-                assigned_gt_inds[:] = 0
-            return AssignResult(
-                num_gts, assigned_gt_inds, None, labels=assigned_labels)
-
-        # 2. compute the weighted costs
-        cost = self.cost(preds, gts)
-
-        # 3. do Hungarian matching on CPU using linear_sum_assignment
-        cost = cost.detach().cpu().numpy()
-        if linear_sum_assignment is None:
-            raise ImportError('Please run "pip install scipy" '
-                              'to install scipy first.')
-        try:
-            matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
-        except:
-            print('cost max{}, min{}'.format(cost.max(), cost.min()))
-            import ipdb; ipdb.set_trace()
-        matched_row_inds = torch.from_numpy(matched_row_inds).to(
-            preds['lines'].device)
-        matched_col_inds = torch.from_numpy(matched_col_inds).to(
-            preds['lines'].device)
-
-        # 4. assign backgrounds and foregrounds
-        # assign all indices to backgrounds first
-        assigned_gt_inds[:] = 0
-        # assign foregrounds based on matching results
-        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
-        assigned_labels[matched_row_inds] = gts['labels'][matched_col_inds]
-        return AssignResult(
+import torch
+
+from mmdet.core.bbox.builder import BBOX_ASSIGNERS
+from mmdet.core.bbox.assigners import AssignResult
+from mmdet.core.bbox.assigners import BaseAssigner
+from mmdet.core.bbox.match_costs import build_match_cost
+
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+
+
+@BBOX_ASSIGNERS.register_module()
+class HungarianLinesAssigner(BaseAssigner):
+    """
+        Computes one-to-one matching between predictions and ground truth.
+        This class computes an assignment between the targets and the predictions
+        based on the costs. The costs are weighted sum of three components:
+        classification cost and regression L1 cost. The
+        targets don't include the no_object, so generally there are more
+        predictions than targets. After the one-to-one matching, the un-matched
+        are treated as backgrounds. Thus each query prediction will be assigned
+        with `0` or a positive integer indicating the ground truth index:
+        - 0: negative sample, no assigned gt
+        - positive integer: positive sample, index (1-based) of assigned gt
+        Args:
+            cls_weight (int | float, optional): The scale factor for classification
+                cost. Default 1.0.
+            bbox_weight (int | float, optional): The scale factor for regression
+                L1 cost. Default 1.0.
+    """
+
+    def __init__(self,
+                 cost=dict(
+                     type='MapQueriesCost',
+                     cls_cost=dict(type='ClassificationCost', weight=1.),
+                     reg_cost=dict(type='LinesCost', weight=1.0),
+                    ),
+                 pc_range=None, 
+                 **kwargs):
+
+        self.pc_range = pc_range
+        self.cost = build_match_cost(cost)
+
+    def assign(self,
+               preds: dict,
+               gts: dict,
+               gt_bboxes_ignore=None,
+               eps=1e-7):
+        """
+            Computes one-to-one matching based on the weighted costs.
+            This method assign each query prediction to a ground truth or
+            background. The `assigned_gt_inds` with -1 means don't care,
+            0 means negative sample, and positive number is the index (1-based)
+            of assigned gt.
+            The assignment is done in the following steps, the order matters.
+            1. assign every prediction to -1
+            2. compute the weighted costs
+            3. do Hungarian matching on CPU based on the costs
+            4. assign all to 0 (background) first, then for each matched pair
+            between predictions and gts, treat this prediction as foreground
+            and assign the corresponding gt index (plus 1) to it.
+            Args:
+                lines_pred (Tensor): predicted normalized lines:
+                    [num_query, num_points, 2]
+                cls_pred (Tensor): Predicted classification logits, shape
+                    [num_query, num_class].
+
+                Note: when compute bbox l1 loss, velocity is not included!!
+
+                lines_gt (Tensor): Ground truth lines
+
+                    [num_gt, num_points, 2].
+                labels_gt (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+                gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                    labelled as `ignored`. Default None.
+                eps (int | float, optional): A value added to the denominator for
+                    numerical stability. Default 1e-7.
+            Returns:
+                :obj:`AssignResult`: The assigned result.
+        """
+        assert gt_bboxes_ignore is None, \
+            'Only case when gt_bboxes_ignore is None is supported.'
+        num_gts, num_lines = gts['lines'].size(0), preds['lines'].size(0)
+
+        # 1. assign -1 by default
+        assigned_gt_inds = \
+            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)
+        assigned_labels = \
+            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)
+
+        if num_gts == 0 or num_lines == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+
+        # 2. compute the weighted costs
+        cost = self.cost(preds, gts)
+
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu().numpy()
+        if linear_sum_assignment is None:
+            raise ImportError('Please run "pip install scipy" '
+                              'to install scipy first.')
+        try:
+            matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        except:
+            print('cost max{}, min{}'.format(cost.max(), cost.min()))
+            import ipdb; ipdb.set_trace()
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(
+            preds['lines'].device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(
+            preds['lines'].device)
+
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gts['labels'][matched_col_inds]
+        return AssignResult(
            num_gts, assigned_gt_inds, None, labels=assigned_labels)
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/match_cost.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/assigner/match_cost.py
-import torch
-from mmdet.core.bbox.match_costs.builder import MATCH_COST
-from mmdet.core.bbox.match_costs import build_match_cost
-
-from mmdet.core.bbox.iou_calculators import bbox_overlaps
-from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy
-
-
-def chamfer_distance(pred, gt):
-    '''
-    Args:
-    pred: [num_points, 2]
-    gt: [num_gt, 2]
-    Out: torch.FloatTensor of shape (1, )
-    '''
-    # [num_points, num_gt]
-    dist_mat = torch.cdist(pred, gt, p=2)
-    # [num_points]
-    dist_pred, _ = torch.min(dist_mat, dim=-1)
-
-    dist_pred = torch.clamp(dist_pred, max=2.0)
-
-    dist_pred = dist_pred.mean()
-
-    dist_gt, _ = torch.min(dist_mat, dim=0)
-    dist_gt = torch.clamp(dist_gt, max=2.0)
-    dist_gt = dist_gt.mean()
-
-    dist = dist_pred + dist_gt
-    return dist
-
-
-@MATCH_COST.register_module()
-class ClsSigmoidCost:
-    """ClsSoftmaxCost.
-     Args:
-         weight (int | float, optional): loss_weight
-    """
-
-    def __init__(self, weight=1.):
-        self.weight = weight
-
-    def __call__(self, cls_pred, gt_labels):
-        """
-        Args:
-            cls_pred (Tensor): Predicted classification logits, shape
-                [num_query, num_class].
-            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
-        Returns:
-            torch.Tensor: cls_cost value with weight
-        """
-        # Following the official DETR repo, contrary to the loss that
-        # NLL is used, we approximate it in 1 - cls_score[gt_label].
-        # The 1 is a constant that doesn't change the matching,
-        # so it can be omitted.
-        cls_score = cls_pred.sigmoid()
-        cls_cost = -cls_score[:, gt_labels]
-        return cls_cost * self.weight
-
-
-@MATCH_COST.register_module()
-class LinesFixNumChamferCost(object):
-    """BBox3DL1Cost.
-     Args:
-         weight (int | float, optional): loss_weight
-    """
-
-    def __init__(self, weight=1.):
-        self.weight = weight
-
-    def __call__(self, lines_pred, gt_lines):
-        """
-        Args:
-            lines_pred (Tensor): predicted normalized lines:
-                [num_query, num_points, 2]
-            gt_lines (Tensor): Ground truth lines
-                [num_gt, num_points, 2]
-        Returns:
-            torch.Tensor: reg_cost value with weight
-                shape [num_pred, num_gt]
-        """
-
-        num_gts, num_bboxes = gt_lines.size(0), lines_pred.size(0)
-
-        dist_mat = lines_pred.new_full((num_bboxes, num_gts),
-                                       1.0,)
-
-        for i in range(num_bboxes):
-            for j in range(num_gts):
-                dist_mat[i, j] = chamfer_distance(
-                    lines_pred[i], gt_lines[j])
-
-        return dist_mat * self.weight
-
-
-@MATCH_COST.register_module()
-class LinesCost(object):
-    """LinesL1Cost.
-     Args:
-         weight (int | float, optional): loss_weight
-    """
-
-    def __init__(self, weight=1.):
-        self.weight = weight
-
-    def __call__(self, lines_pred, gt_lines, **kwargs):
-        """
-        Args:
-            lines_pred (Tensor): predicted normalized lines:
-                [num_query, num_points, 2]
-            gt_lines (Tensor): Ground truth lines
-                [num_gt, num_points, 2]
-        Returns:
-            torch.Tensor: reg_cost value with weight
-                shape [num_pred, num_gt]
-        """
-        gt_revser = torch.flip(gt_lines, dims=[-2])
-        gt_revser_flat = gt_revser.flatten(1, 2)
-
-        pred_flat = lines_pred.flatten(1, 2)
-        gt_flat = gt_lines.flatten(1, 2)
-
-        div_ = pred_flat.size(-1)
-
-        dist_mat = torch.cdist(pred_flat, gt_flat, p=1) / div_
-
-        return dist_mat * self.weight
-
-
-@MATCH_COST.register_module()
-class BBoxCostC:
-    """BBoxL1Cost.
-     Args:
-         weight (int | float, optional): loss_weight
-         box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN
-     Examples:
-         >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost
-         >>> import torch
-         >>> self = BBoxL1Cost()
-         >>> bbox_pred = torch.rand(1, 4)
-         >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
-         >>> factor = torch.tensor([10, 8, 10, 8])
-         >>> self(bbox_pred, gt_bboxes, factor)
-         tensor([[1.6172, 1.6422]])
-    """
-
-    def __init__(self, weight=1., box_format='xyxy'):
-        self.weight = weight
-        assert box_format in ['xyxy', 'xywh']
-        self.box_format = box_format
-
-    def __call__(self, bbox_pred, gt_bboxes):
-        """
-        Args:
-            bbox_pred (Tensor): Predicted boxes with normalized coordinates
-                (cx, cy, w, h), which are all in range [0, 1]. Shape
-                [num_query, 4].
-            gt_bboxes (Tensor): Ground truth boxes with normalized
-                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
-        Returns:
-            torch.Tensor: bbox_cost value with weight
-        """
-        # if self.box_format == 'xywh':
-        #     gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)
-        # elif self.box_format == 'xyxy':
-        #     bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)
-        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
-        return bbox_cost * self.weight
-
-
-@MATCH_COST.register_module()
-class IoUCostC:
-    """IoUCost.
-     Args:
-         iou_mode (str, optional): iou mode such as 'iou' | 'giou'
-         weight (int | float, optional): loss weight
-     Examples:
-         >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
-         >>> import torch
-         >>> self = IoUCost()
-         >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
-         >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
-         >>> self(bboxes, gt_bboxes)
-         tensor([[-0.1250,  0.1667],
-                [ 0.1667, -0.5000]])
-    """
-
-    def __init__(self, iou_mode='giou', weight=1., box_format='xywh'):
-        self.weight = weight
-        self.iou_mode = iou_mode
-        assert box_format in ['xyxy', 'xywh']
-        self.box_format = box_format
-
-    def __call__(self, bboxes, gt_bboxes):
-        """
-        Args:
-            bboxes (Tensor): Predicted boxes with unnormalized coordinates
-                (x1, y1, x2, y2). Shape [num_query, 4].
-            gt_bboxes (Tensor): Ground truth boxes with unnormalized
-                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
-        Returns:
-            torch.Tensor: iou_cost value with weight
-        """
-        if self.box_format == 'xywh':
-            bboxes = bbox_cxcywh_to_xyxy(bboxes)
-            gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)
-
-        # overlaps: [num_bboxes, num_gt]
-        overlaps = bbox_overlaps(
-            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)
-        # The 1 is a constant that doesn't change the matching, so omitted.
-        iou_cost = -overlaps
-        return iou_cost * self.weight
-
-@MATCH_COST.register_module()
-class DynamicLinesCost(object):
-    """LinesL1Cost.
-     Args:
-         weight (int | float, optional): loss_weight
-    """
-
-    def __init__(self, weight=1.):
-        self.weight = weight
-
-    def __call__(self, lines_pred, lines_gt, masks_pred, masks_gt):
-        """
-        Args:
-            lines_pred (Tensor): predicted normalized lines:
-                [nP, num_points, 2]
-            lines_gt (Tensor): Ground truth lines
-                [nG, num_points, 2]
-            masks_pred: [nP, num_points]
-            masks_gt: [nG, num_points]
-        Returns:
-            dist_mat: reg_cost value with weight
-                shape [nP, nG]
-        """
-
-        dist_mat = self.cal_dist(lines_pred, lines_gt)
-
-        dist_mat = self.get_dynamic_line(dist_mat, masks_pred, masks_gt)
-
-        dist_mat = dist_mat * self.weight
-
-        return dist_mat
-
-    def cal_dist(self, x1, x2):
-        '''
-            Args:
-                x1: B1,N,2
-                x2: B2,N,2
-            Return:
-                dist_mat: B1,B2,N
-        '''
-        x1 = x1.permute(1, 0, 2)
-        x2 = x2.permute(1, 0, 2)
-
-        dist_mat = torch.cdist(x1, x2, p=2)
-
-        dist_mat = dist_mat.permute(1, 2, 0)
-
-        return dist_mat
-
-    def get_dynamic_line(self, mat, m1, m2):
-        '''
-            get dynamic line with difference approach
-            mat: N1xN2xnpts
-            m1: N1xnpts
-            m2: N2xnpts
-        '''
-
-        # nPxnGxnum_points
-        m1 = m1.unsqueeze(1).sigmoid() > 0.5
-        m2 = m2.unsqueeze(0)
-
-        valid_points_mask = (m1 + m2)/2.
-
-        average_factor_mask = valid_points_mask.sum(-1) > 0
-        average_factor = average_factor_mask.masked_fill(
-            ~average_factor_mask, 1)
-
-        # takes the average
-        mat = mat * valid_points_mask
-        mat = mat.sum(-1) / average_factor
-
-        return mat
-
-
-@MATCH_COST.register_module()
-class BBoxLogitsCost(object):
-    """BBoxLogits.
-     Args:
-         weight (int | float, optional): loss_weight
-    """
-
-    def __init__(self, weight=1.):
-        self.weight = weight
-
-    def calNLL(self, logits, value):
-        '''
-            Args:
-                logits: B1, 8, cls_dim
-                value: B2, 8,
-            Return:
-                log_likelihood: B1,B2,8
-        '''
-
-        logits = logits[:, None]
-        value = value[None]
-
-        value = value.long().unsqueeze(-1)
-        value, log_pmf = torch.broadcast_tensors(value, logits)
-        value = value[..., :1]
-        return log_pmf.gather(-1, value).squeeze(-1)
-
-    def __call__(self, bbox_pred, bbox_gt, **kwargs):
-        """
-        Args:
-            bbox_pred: nproposal, 4*2, pos_dim
-            bbox_gt: ngt, 4*2
-        Returns:
-            cost: nproposal, ngt
-        """
-
-        cost = self.calNLL(bbox_pred, bbox_gt).mean(-1)
-
-        return cost * self.weight
-
-
-@MATCH_COST.register_module()
-class MapQueriesCost(object):
-
-    def __init__(self, cls_cost, reg_cost, iou_cost=None):
-
-        self.cls_cost = build_match_cost(cls_cost)
-        self.reg_cost = build_match_cost(reg_cost)
-
-        self.iou_cost = None
-        if iou_cost is not None:
-            self.iou_cost = build_match_cost(iou_cost)
-
-    def __call__(self, preds: dict, gts: dict):
-
-        # classification and bboxcost.
-        cls_cost = self.cls_cost(preds['scores'], gts['labels'])
-
-        # regression cost
-        regkwargs = {}
-        if 'masks' in preds and 'masks' in gts:
-            assert isinstance(self.reg_cost, DynamicLinesCost), ' Issues!!'
-            regkwargs = {
-                'masks_pred': preds['masks'],
-                'masks_gt': gts['masks'],
-            }
-
-        reg_cost = self.reg_cost(preds['lines'], gts['lines'], **regkwargs)
-
-        # weighted sum of above three costs
-        cost = cls_cost + reg_cost
-
-        # Iou
-        if self.iou_cost is not None:
-            iou_cost = self.iou_cost(preds['lines'],gts['lines'])
-            cost += iou_cost
-
-
-        return cost
+import torch
+from mmdet.core.bbox.match_costs.builder import MATCH_COST
+from mmdet.core.bbox.match_costs import build_match_cost
+
+from mmdet.core.bbox.iou_calculators import bbox_overlaps
+from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy
+
+
+def chamfer_distance(pred, gt):
+    '''
+    Args:
+    pred: [num_points, 2]
+    gt: [num_gt, 2]
+    Out: torch.FloatTensor of shape (1, )
+    '''
+    # [num_points, num_gt]
+    dist_mat = torch.cdist(pred, gt, p=2)
+    # [num_points]
+    dist_pred, _ = torch.min(dist_mat, dim=-1)
+
+    dist_pred = torch.clamp(dist_pred, max=2.0)
+
+    dist_pred = dist_pred.mean()
+
+    dist_gt, _ = torch.min(dist_mat, dim=0)
+    dist_gt = torch.clamp(dist_gt, max=2.0)
+    dist_gt = dist_gt.mean()
+
+    dist = dist_pred + dist_gt
+    return dist
+
+
+@MATCH_COST.register_module()
+class ClsSigmoidCost:
+    """ClsSoftmaxCost.
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+
+    def __init__(self, weight=1.):
+        self.weight = weight
+
+    def __call__(self, cls_pred, gt_labels):
+        """
+        Args:
+            cls_pred (Tensor): Predicted classification logits, shape
+                [num_query, num_class].
+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+        Returns:
+            torch.Tensor: cls_cost value with weight
+        """
+        # Following the official DETR repo, contrary to the loss that
+        # NLL is used, we approximate it in 1 - cls_score[gt_label].
+        # The 1 is a constant that doesn't change the matching,
+        # so it can be omitted.
+        cls_score = cls_pred.sigmoid()
+        cls_cost = -cls_score[:, gt_labels]
+        return cls_cost * self.weight
+
+
+@MATCH_COST.register_module()
+class LinesFixNumChamferCost(object):
+    """BBox3DL1Cost.
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+
+    def __init__(self, weight=1.):
+        self.weight = weight
+
+    def __call__(self, lines_pred, gt_lines):
+        """
+        Args:
+            lines_pred (Tensor): predicted normalized lines:
+                [num_query, num_points, 2]
+            gt_lines (Tensor): Ground truth lines
+                [num_gt, num_points, 2]
+        Returns:
+            torch.Tensor: reg_cost value with weight
+                shape [num_pred, num_gt]
+        """
+
+        num_gts, num_bboxes = gt_lines.size(0), lines_pred.size(0)
+
+        dist_mat = lines_pred.new_full((num_bboxes, num_gts),
+                                       1.0,)
+
+        for i in range(num_bboxes):
+            for j in range(num_gts):
+                dist_mat[i, j] = chamfer_distance(
+                    lines_pred[i], gt_lines[j])
+
+        return dist_mat * self.weight
+
+
+@MATCH_COST.register_module()
+class LinesCost(object):
+    """LinesL1Cost.
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+
+    def __init__(self, weight=1.):
+        self.weight = weight
+
+    def __call__(self, lines_pred, gt_lines, **kwargs):
+        """
+        Args:
+            lines_pred (Tensor): predicted normalized lines:
+                [num_query, num_points, 2]
+            gt_lines (Tensor): Ground truth lines
+                [num_gt, num_points, 2]
+        Returns:
+            torch.Tensor: reg_cost value with weight
+                shape [num_pred, num_gt]
+        """
+        gt_revser = torch.flip(gt_lines, dims=[-2])
+        gt_revser_flat = gt_revser.flatten(1, 2)
+
+        pred_flat = lines_pred.flatten(1, 2)
+        gt_flat = gt_lines.flatten(1, 2)
+
+        div_ = pred_flat.size(-1)
+
+        dist_mat = torch.cdist(pred_flat, gt_flat, p=1) / div_
+
+        return dist_mat * self.weight
+
+
+@MATCH_COST.register_module()
+class BBoxCostC:
+    """BBoxL1Cost.
+     Args:
+         weight (int | float, optional): loss_weight
+         box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN
+     Examples:
+         >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost
+         >>> import torch
+         >>> self = BBoxL1Cost()
+         >>> bbox_pred = torch.rand(1, 4)
+         >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
+         >>> factor = torch.tensor([10, 8, 10, 8])
+         >>> self(bbox_pred, gt_bboxes, factor)
+         tensor([[1.6172, 1.6422]])
+    """
+
+    def __init__(self, weight=1., box_format='xyxy'):
+        self.weight = weight
+        assert box_format in ['xyxy', 'xywh']
+        self.box_format = box_format
+
+    def __call__(self, bbox_pred, gt_bboxes):
+        """
+        Args:
+            bbox_pred (Tensor): Predicted boxes with normalized coordinates
+                (cx, cy, w, h), which are all in range [0, 1]. Shape
+                [num_query, 4].
+            gt_bboxes (Tensor): Ground truth boxes with normalized
+                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
+        Returns:
+            torch.Tensor: bbox_cost value with weight
+        """
+        # if self.box_format == 'xywh':
+        #     gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)
+        # elif self.box_format == 'xyxy':
+        #     bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)
+        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
+        return bbox_cost * self.weight
+
+
+@MATCH_COST.register_module()
+class IoUCostC:
+    """IoUCost.
+     Args:
+         iou_mode (str, optional): iou mode such as 'iou' | 'giou'
+         weight (int | float, optional): loss weight
+     Examples:
+         >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
+         >>> import torch
+         >>> self = IoUCost()
+         >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
+         >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
+         >>> self(bboxes, gt_bboxes)
+         tensor([[-0.1250,  0.1667],
+                [ 0.1667, -0.5000]])
+    """
+
+    def __init__(self, iou_mode='giou', weight=1., box_format='xywh'):
+        self.weight = weight
+        self.iou_mode = iou_mode
+        assert box_format in ['xyxy', 'xywh']
+        self.box_format = box_format
+
+    def __call__(self, bboxes, gt_bboxes):
+        """
+        Args:
+            bboxes (Tensor): Predicted boxes with unnormalized coordinates
+                (x1, y1, x2, y2). Shape [num_query, 4].
+            gt_bboxes (Tensor): Ground truth boxes with unnormalized
+                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
+        Returns:
+            torch.Tensor: iou_cost value with weight
+        """
+        if self.box_format == 'xywh':
+            bboxes = bbox_cxcywh_to_xyxy(bboxes)
+            gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)
+
+        # overlaps: [num_bboxes, num_gt]
+        overlaps = bbox_overlaps(
+            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)
+        # The 1 is a constant that doesn't change the matching, so omitted.
+        iou_cost = -overlaps
+        return iou_cost * self.weight
+
+@MATCH_COST.register_module()
+class DynamicLinesCost(object):
+    """LinesL1Cost.
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+
+    def __init__(self, weight=1.):
+        self.weight = weight
+
+    def __call__(self, lines_pred, lines_gt, masks_pred, masks_gt):
+        """
+        Args:
+            lines_pred (Tensor): predicted normalized lines:
+                [nP, num_points, 2]
+            lines_gt (Tensor): Ground truth lines
+                [nG, num_points, 2]
+            masks_pred: [nP, num_points]
+            masks_gt: [nG, num_points]
+        Returns:
+            dist_mat: reg_cost value with weight
+                shape [nP, nG]
+        """
+
+        dist_mat = self.cal_dist(lines_pred, lines_gt)
+
+        dist_mat = self.get_dynamic_line(dist_mat, masks_pred, masks_gt)
+
+        dist_mat = dist_mat * self.weight
+
+        return dist_mat
+
+    def cal_dist(self, x1, x2):
+        '''
+            Args:
+                x1: B1,N,2
+                x2: B2,N,2
+            Return:
+                dist_mat: B1,B2,N
+        '''
+        x1 = x1.permute(1, 0, 2)
+        x2 = x2.permute(1, 0, 2)
+
+        dist_mat = torch.cdist(x1, x2, p=2)
+
+        dist_mat = dist_mat.permute(1, 2, 0)
+
+        return dist_mat
+
+    def get_dynamic_line(self, mat, m1, m2):
+        '''
+            get dynamic line with difference approach
+            mat: N1xN2xnpts
+            m1: N1xnpts
+            m2: N2xnpts
+        '''
+
+        # nPxnGxnum_points
+        m1 = m1.unsqueeze(1).sigmoid() > 0.5
+        m2 = m2.unsqueeze(0)
+
+        valid_points_mask = (m1 + m2)/2.
+
+        average_factor_mask = valid_points_mask.sum(-1) > 0
+        average_factor = average_factor_mask.masked_fill(
+            ~average_factor_mask, 1)
+
+        # takes the average
+        mat = mat * valid_points_mask
+        mat = mat.sum(-1) / average_factor
+
+        return mat
+
+
+@MATCH_COST.register_module()
+class BBoxLogitsCost(object):
+    """BBoxLogits.
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+
+    def __init__(self, weight=1.):
+        self.weight = weight
+
+    def calNLL(self, logits, value):
+        '''
+            Args:
+                logits: B1, 8, cls_dim
+                value: B2, 8,
+            Return:
+                log_likelihood: B1,B2,8
+        '''
+
+        logits = logits[:, None]
+        value = value[None]
+
+        value = value.long().unsqueeze(-1)
+        value, log_pmf = torch.broadcast_tensors(value, logits)
+        value = value[..., :1]
+        return log_pmf.gather(-1, value).squeeze(-1)
+
+    def __call__(self, bbox_pred, bbox_gt, **kwargs):
+        """
+        Args:
+            bbox_pred: nproposal, 4*2, pos_dim
+            bbox_gt: ngt, 4*2
+        Returns:
+            cost: nproposal, ngt
+        """
+
+        cost = self.calNLL(bbox_pred, bbox_gt).mean(-1)
+
+        return cost * self.weight
+
+
+@MATCH_COST.register_module()
+class MapQueriesCost(object):
+
+    def __init__(self, cls_cost, reg_cost, iou_cost=None):
+
+        self.cls_cost = build_match_cost(cls_cost)
+        self.reg_cost = build_match_cost(reg_cost)
+
+        self.iou_cost = None
+        if iou_cost is not None:
+            self.iou_cost = build_match_cost(iou_cost)
+
+    def __call__(self, preds: dict, gts: dict):
+
+        # classification and bboxcost.
+        cls_cost = self.cls_cost(preds['scores'], gts['labels'])
+
+        # regression cost
+        regkwargs = {}
+        if 'masks' in preds and 'masks' in gts:
+            assert isinstance(self.reg_cost, DynamicLinesCost), ' Issues!!'
+            regkwargs = {
+                'masks_pred': preds['masks'],
+                'masks_gt': gts['masks'],
+            }
+
+        reg_cost = self.reg_cost(preds['lines'], gts['lines'], **regkwargs)
+
+        # weighted sum of above three costs
+        cost = cls_cost + reg_cost
+
+        # Iou
+        if self.iou_cost is not None:
+            iou_cost = self.iou_cost(preds['lines'],gts['lines'])
+            cost += iou_cost
+
+
+        return cost
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/__init__.py
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/sythesis_det.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/augmentation/sythesis_det.py
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class NoiseSythesis(nn.Module):
-
-    def __init__(self, 
-            p, scale=0.01, shift_scale=(8,5), 
-            scaling_size=(0.1,0.1), canvas_size=(200, 100),
-            bbox_type='sce',
-            poly_coord_dim=2,
-            bbox_coord_dim=2,
-            quantify=True):
-        super(NoiseSythesis, self).__init__()
-
-        self.p = p
-        self.scale = scale
-        self.bbox_type = bbox_type
-        self.quantify = quantify
-
-        self.poly_coord_dim = poly_coord_dim
-        self.bbox_coord_dim = bbox_coord_dim
-
-        self.transforms = [self.random_shifting, self.random_scaling]
-        # self.transforms = [self.random_scaling]
-
-        self.register_buffer('canvas_size', torch.tensor(canvas_size))
-        self.register_buffer('shift_scale', torch.tensor(shift_scale).float())
-        self.register_buffer('scaling_size', torch.tensor(scaling_size))
-
-    def random_scaling(self, bbox):
-        '''
-            bbox: B, paramter_num, 2
-        '''
-        device = bbox.device
-        dtype = bbox.dtype
-        B = bbox.shape[0]
-
-        noise = (torch.rand(B, device=device)*2-1)[:,None,None] # [-1,1]
-        scale = self.scaling_size.to(device)
-        scale = (noise * scale) + 1
-
-        scaled_bbox = bbox * scale
-
-        # recenterization
-        coffset = scaled_bbox.mean(-2) - bbox.float().mean(-2)
-        scaled_bbox = scaled_bbox - coffset[:,None]
-
-        return scaled_bbox.round().type(dtype)
-
-    def random_shifting(self, bbox):
-        '''
-            bbox: B, paramter_num, 2
-        '''
-        device = bbox.device
-        batch_size = bbox.shape[0]
-
-        shift_scale = self.shift_scale
-        scale = (bbox.max(1)[0] - bbox.min(1)[0]) * 0.1
-        scale = torch.where(scale < shift_scale, scale, shift_scale)
-
-        noise = (torch.rand(batch_size, 2, device=device)*2-1) # [-1,1]
-        offset = (noise * scale).round().type(bbox.dtype)
-
-        shifted_bbox = bbox + offset[:, None]
-        
-        return shifted_bbox
-    
-    def gaussian_noise_bbox(self, bbox):
-
-        dtype = bbox.dtype
-        batch_size = bbox.shape[0]
-
-        scale = (self.canvas_size * self.scale)[:self.bbox_coord_dim]
-
-        noisy_bbox = torch.normal(bbox.type(torch.float), scale)
-
-        if self.quantify:
-            noisy_bbox = noisy_bbox.round().type(dtype)
-            # prevent out of bound case
-            for i in range(self.bbox_coord_dim):
-                noisy_bbox[...,i] =\
-                    torch.clamp(noisy_bbox[...,0],1,self.canvas_size[i])
-        else:
-            noisy_bbox = noisy_bbox.type(torch.float)
-        
-        return noisy_bbox
-    
-    def gaussian_noise_poly(self, polyline, polyline_mask):
-
-        device = polyline.device
-        batchsize = polyline.shape[0]
-        scale = self.canvas_size * self.scale
-
-        polyline = F.pad(polyline,(0,self.poly_coord_dim-1))
-        polyline = polyline.view(batchsize,-1, self.poly_coord_dim)
-        mask = F.pad(polyline_mask[:,1:],(0,self.poly_coord_dim))
-        
-        noisy_polyline = torch.normal(polyline.type(torch.float), scale)
-
-        if self.quantify:
-            noisy_polyline = noisy_polyline.round().type(polyline.dtype)
-
-            # prevent out of bound case
-            for i in range(self.poly_coord_dim):
-                noisy_polyline[...,i] =\
-                    torch.clamp(noisy_polyline[...,i],0,self.canvas_size[i])
-
-        else:
-            noisy_polyline = noisy_polyline.type(torch.float)
-
-        noisy_polyline = noisy_polyline.view(batchsize,-1) * mask
-        noisy_polyline = noisy_polyline[:,:-(self.poly_coord_dim-1)]
-
-        return noisy_polyline
-
-    def random_apply(self, bbox):
-
-        for t in self.transforms:
-
-            if self.p < torch.rand(1):
-                continue
-
-            bbox = t(bbox)
-
-        # prevent out of bound case
-        bbox[...,0] =\
-            torch.clamp(bbox[...,0],0,self.canvas_size[0])
-        
-        bbox[...,1] =\
-            torch.clamp(bbox[...,1],0,self.canvas_size[1])
-
-        return bbox
-
-    def simple_aug(self, batch):
-
-        # augment bbox
-        if self.bbox_type in ['sce', 'xyxy']:
-            fbbox = batch['bbox_flat']
-            seq_len = fbbox.shape[0]
-            bbox = fbbox.view(seq_len, -1, 2)
-            bbox = self.gaussian_noise_bbox(bbox)
-            fbbox_aug = bbox.view(seq_len, -1)
-
-            aug_mask = torch.rand(fbbox.shape,device=fbbox.device)
-            fbbox = torch.where(aug_mask<self.p, fbbox_aug, fbbox)
-        elif self.bbox_type == 'rxyxy':
-            fbbox = self.rbbox_aug(batch)
-        elif self.bbox_type == 'convex_hull':
-            fbbox = self.convex_hull_aug(batch)
-
-        # augment
-        polyline = batch['polylines']
-        polyline_mask = batch['polyline_masks']
-        polyline_aug = self.gaussian_noise_poly(polyline, polyline_mask)
-        
-        aug_mask = torch.rand(polyline.shape,device=polyline.device)
-        polyline = torch.where(aug_mask<self.p, polyline_aug, polyline)
-
-        return polyline, fbbox
-
-    def rbbox_aug(self, batch):
-        
-        return None
-    
-    def convex_hull_aug(self,batch):
-    
-        return None
-
-    def __call__(self, batch, simple_aug=False):
-
-        if simple_aug:
-
-            return self.simple_aug(batch)
-
-        else:
-            fbbox = batch['bbox_flat']
-            seq_len = fbbox.shape[0]
-            bbox = fbbox.view(seq_len, -1, self.bbox_coord_dim)
-
-            aug_bbox = self.random_apply(bbox)
-
-            aug_bbox_flat = aug_bbox.view(seq_len, -1)
-
-
-        return aug_bbox_flat
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class NoiseSythesis(nn.Module):
+
+    def __init__(self, 
+            p, scale=0.01, shift_scale=(8,5), 
+            scaling_size=(0.1,0.1), canvas_size=(200, 100),
+            bbox_type='sce',
+            poly_coord_dim=2,
+            bbox_coord_dim=2,
+            quantify=True):
+        super(NoiseSythesis, self).__init__()
+
+        self.p = p
+        self.scale = scale
+        self.bbox_type = bbox_type
+        self.quantify = quantify
+
+        self.poly_coord_dim = poly_coord_dim
+        self.bbox_coord_dim = bbox_coord_dim
+
+        self.transforms = [self.random_shifting, self.random_scaling]
+        # self.transforms = [self.random_scaling]
+
+        self.register_buffer('canvas_size', torch.tensor(canvas_size))
+        self.register_buffer('shift_scale', torch.tensor(shift_scale).float())
+        self.register_buffer('scaling_size', torch.tensor(scaling_size))
+
+    def random_scaling(self, bbox):
+        '''
+            bbox: B, paramter_num, 2
+        '''
+        device = bbox.device
+        dtype = bbox.dtype
+        B = bbox.shape[0]
+
+        noise = (torch.rand(B, device=device)*2-1)[:,None,None] # [-1,1]
+        scale = self.scaling_size.to(device)
+        scale = (noise * scale) + 1
+
+        scaled_bbox = bbox * scale
+
+        # recenterization
+        coffset = scaled_bbox.mean(-2) - bbox.float().mean(-2)
+        scaled_bbox = scaled_bbox - coffset[:,None]
+
+        return scaled_bbox.round().type(dtype)
+
+    def random_shifting(self, bbox):
+        '''
+            bbox: B, paramter_num, 2
+        '''
+        device = bbox.device
+        batch_size = bbox.shape[0]
+
+        shift_scale = self.shift_scale
+        scale = (bbox.max(1)[0] - bbox.min(1)[0]) * 0.1
+        scale = torch.where(scale < shift_scale, scale, shift_scale)
+
+        noise = (torch.rand(batch_size, 2, device=device)*2-1) # [-1,1]
+        offset = (noise * scale).round().type(bbox.dtype)
+
+        shifted_bbox = bbox + offset[:, None]
+        
+        return shifted_bbox
+    
+    def gaussian_noise_bbox(self, bbox):
+
+        dtype = bbox.dtype
+        batch_size = bbox.shape[0]
+
+        scale = (self.canvas_size * self.scale)[:self.bbox_coord_dim]
+
+        noisy_bbox = torch.normal(bbox.type(torch.float), scale)
+
+        if self.quantify:
+            noisy_bbox = noisy_bbox.round().type(dtype)
+            # prevent out of bound case
+            for i in range(self.bbox_coord_dim):
+                noisy_bbox[...,i] =\
+                    torch.clamp(noisy_bbox[...,0],1,self.canvas_size[i])
+        else:
+            noisy_bbox = noisy_bbox.type(torch.float)
+        
+        return noisy_bbox
+    
+    def gaussian_noise_poly(self, polyline, polyline_mask):
+
+        device = polyline.device
+        batchsize = polyline.shape[0]
+        scale = self.canvas_size * self.scale
+
+        polyline = F.pad(polyline,(0,self.poly_coord_dim-1))
+        polyline = polyline.view(batchsize,-1, self.poly_coord_dim)
+        mask = F.pad(polyline_mask[:,1:],(0,self.poly_coord_dim))
+        
+        noisy_polyline = torch.normal(polyline.type(torch.float), scale)
+
+        if self.quantify:
+            noisy_polyline = noisy_polyline.round().type(polyline.dtype)
+
+            # prevent out of bound case
+            for i in range(self.poly_coord_dim):
+                noisy_polyline[...,i] =\
+                    torch.clamp(noisy_polyline[...,i],0,self.canvas_size[i])
+
+        else:
+            noisy_polyline = noisy_polyline.type(torch.float)
+
+        noisy_polyline = noisy_polyline.view(batchsize,-1) * mask
+        noisy_polyline = noisy_polyline[:,:-(self.poly_coord_dim-1)]
+
+        return noisy_polyline
+
+    def random_apply(self, bbox):
+
+        for t in self.transforms:
+
+            if self.p < torch.rand(1):
+                continue
+
+            bbox = t(bbox)
+
+        # prevent out of bound case
+        bbox[...,0] =\
+            torch.clamp(bbox[...,0],0,self.canvas_size[0])
+        
+        bbox[...,1] =\
+            torch.clamp(bbox[...,1],0,self.canvas_size[1])
+
+        return bbox
+
+    def simple_aug(self, batch):
+
+        # augment bbox
+        if self.bbox_type in ['sce', 'xyxy']:
+            fbbox = batch['bbox_flat']
+            seq_len = fbbox.shape[0]
+            bbox = fbbox.view(seq_len, -1, 2)
+            bbox = self.gaussian_noise_bbox(bbox)
+            fbbox_aug = bbox.view(seq_len, -1)
+
+            aug_mask = torch.rand(fbbox.shape,device=fbbox.device)
+            fbbox = torch.where(aug_mask<self.p, fbbox_aug, fbbox)
+        elif self.bbox_type == 'rxyxy':
+            fbbox = self.rbbox_aug(batch)
+        elif self.bbox_type == 'convex_hull':
+            fbbox = self.convex_hull_aug(batch)
+
+        # augment
+        polyline = batch['polylines']
+        polyline_mask = batch['polyline_masks']
+        polyline_aug = self.gaussian_noise_poly(polyline, polyline_mask)
+        
+        aug_mask = torch.rand(polyline.shape,device=polyline.device)
+        polyline = torch.where(aug_mask<self.p, polyline_aug, polyline)
+
+        return polyline, fbbox
+
+    def rbbox_aug(self, batch):
+        
+        return None
+    
+    def convex_hull_aug(self,batch):
+    
+        return None
+
+    def __call__(self, batch, simple_aug=False):
+
+        if simple_aug:
+
+            return self.simple_aug(batch)
+
+        else:
+            fbbox = batch['bbox_flat']
+            seq_len = fbbox.shape[0]
+            bbox = fbbox.view(seq_len, -1, self.bbox_coord_dim)
+
+            aug_bbox = self.random_apply(bbox)
+
+            aug_bbox_flat = aug_bbox.view(seq_len, -1)
+
+
+        return aug_bbox_flat
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/__init__.py
-from .ipm_backbone import IPMEncoder
-
-__all__ = [
-   'IPMEncoder'
-]
+from .ipm_backbone import IPMEncoder
+
+__all__ = [
+   'IPMEncoder'
+]
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/internimage.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/internimage.py
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/ipm_backbone.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/backbones/ipm_backbone.py
-import copy
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from mmdet3d.models.builder import BACKBONES
-from mmdet.models import build_backbone, build_neck
-
-class UpsampleBlock(nn.Module):
-    def __init__(self, ins, outs):
-        super(UpsampleBlock, self).__init__()
-        self.gn = nn.GroupNorm(32, outs)
-        self.conv = nn.Conv2d(ins, outs, kernel_size=3,
-                              stride=1, padding=1)  # same
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-
-        x = self.conv(x)
-        x = self.relu(self.gn(x))
-        x = self.upsample2x(x)
-
-        return x
-
-    def upsample2x(self, x):
-        _, _, h, w = x.shape
-        x = F.interpolate(x, size=(h*2, w*2),
-                          mode='bilinear', align_corners=True)
-        return x
-
-
-class Upsample(nn.Module):
-
-    def __init__(self,
-                 zoom_size=(2, 4, 8),
-                 in_channels=128,
-                 out_channels=128,
-                 ):
-        super(Upsample, self).__init__()
-
-        self.out_channels = out_channels
-
-        input_conv = UpsampleBlock(in_channels, out_channels)
-        inter_conv = UpsampleBlock(out_channels, out_channels)
-
-        fscale = []
-        for scale_factor in zoom_size:
-
-            layer_num = int(math.log2(scale_factor))
-            if layer_num < 1:
-                fscale.append(nn.Identity())
-                continue
-
-            tmp = [copy.deepcopy(input_conv), ]
-            tmp += [copy.deepcopy(inter_conv) for i in range(layer_num-1)]
-            fscale.append(nn.Sequential(*tmp))
-
-        self.fscale = nn.ModuleList(fscale)
-
-    def init_weights(self):
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_uniform_(m.weight, a=1)
-                nn.init.constant_(m.bias, 0)
-
-    def forward(self, imgs):
-
-        rescale_i = []
-        for f, img in zip(self.fscale, imgs):
-            rescale_i.append(f(img))
-
-        out = sum(rescale_i)
-
-        return out
-
-
-@BACKBONES.register_module()
-class IPMEncoder(nn.Module):
-    '''
-    encode cam features
-    '''
-
-    def __init__(self,
-                 img_backbone,
-                 img_neck,
-                 upsample,
-                 xbound=[-30.0, 30.0, 0.5],
-                 ybound=[-15.0, 15.0, 0.5],
-                 zbound=[-10.0, 10.0, 20.0],
-                 heights=[-1.1, 0, 0.5, 1.1],
-                 pretrained=None,
-                 out_channels=128,
-                 num_cam=6,
-                 use_lidar=False,
-                 use_image=True,
-                 lidar_dim=128,
-                 ):
-        super(IPMEncoder, self).__init__()
-        self.x_bound = xbound
-        self.y_bound = ybound
-        self.heights = heights
-
-        self.num_cam = num_cam
-
-        num_x = int((xbound[1] - xbound[0]) / xbound[2])
-        num_y = int((ybound[1] - ybound[0]) / ybound[2])
-
-        self.img_backbone = build_backbone(img_backbone)
-        self.img_neck = build_neck(img_neck)
-        self.upsample = Upsample(**upsample)
-
-        self.use_image = use_image
-        self.use_lidar = use_lidar
-        if self.use_lidar:
-            self.pp = PointPillarEncoder(lidar_dim, xbound, ybound, zbound)
-
-            self.outconvs =\
-                nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels//2, 
-                            kernel_size=3, stride=1, padding=1)  # same
-            if self.use_image:
-                _out_channels = out_channels//2
-            else:
-                _out_channels = out_channels
-
-            self.outconvs_lidar =\
-                nn.Conv2d(lidar_dim, _out_channels, 
-                            kernel_size=3, stride=1, padding=1)  # same
-        else:
-            self.outconvs =\
-                nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels, 
-                            kernel_size=3, stride=1, padding=1)  # same
-
-        self.init_weights(pretrained=pretrained)
-
-        # bev_plane
-        bev_planes = [construct_plane_grid(
-            xbound, ybound, h) for h in self.heights]
-        self.register_buffer('bev_planes', torch.stack(
-            bev_planes),)  # nlvl,bH,bW,2
-
-        self.masked_embeds = nn.Embedding(len(heights), out_channels)
-
-
-    def init_weights(self, pretrained=None):
-        """Initialize model weights."""
-
-        self.img_backbone.init_weights()
-        self.img_neck.init_weights()
-        self.upsample.init_weights()
-
-        for p in self.outconvs.parameters():
-            if p.dim() > 1:
-                nn.init.xavier_uniform_(p)
-        
-        if self.use_lidar:
-            for p in self.outconvs_lidar.parameters():
-                if p.dim() > 1:
-                    nn.init.xavier_uniform_(p)
-            
-            for p in self.pp.parameters():
-                if p.dim() > 1:
-                    nn.init.xavier_uniform_(p)
-
-    def extract_img_feat(self, imgs):
-        '''
-            Extract image feaftures and sum up into one pic
-            Args:
-                imgs: B, n_cam, C, iH, iW
-            Returns: 
-                img_feat: B * n_cam, C, H, W
-        '''
-
-        B, n_cam, C, iH, iW = imgs.shape
-        imgs = imgs.view(B * n_cam, C, iH, iW)
-
-        img_feats = self.img_backbone(imgs)
-
-        # reduce the channel dim
-        img_feats = self.img_neck(img_feats)
-
-        # fuse four feature map
-        img_feat = self.upsample(img_feats)
-
-        return img_feat
-
-    def forward(self, imgs, img_metas, *args, points=None, **kwargs):
-        '''
-            Args: 
-                imgs: torch.Tensor of shape [B, N, 3, H, W]
-                    N: number of cams
-                img_metas: 
-                    # N=6, ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT']
-                    ego2cam: [B, N, 4, 4] 
-                    cam_intrinsics: [B, N, 3, 3]
-                    cam2ego_rotations: [B, N, 3, 3]
-                    cam2ego_translations: [B, N, 3]
-                    ...
-            Outs:
-                bev_feature: torch.Tensor of shape [B, C*nlvl, bH, bW]
-        '''
-
-        if self.use_image:
-            self.B = imgs.shape[0]
-
-            # Get transform matrix
-            ego2cam = []
-            for img_meta in img_metas:
-                ego2cam.append(img_meta['ego2img'])
-            img_shape = imgs.shape[-2:]
-
-            ego2cam = np.asarray(ego2cam)
-            # Image backbone
-            img_feats = self.extract_img_feat(imgs)
-
-            # IPM
-            bev_feat, bev_feat_mask = self.ipm(img_feats, ego2cam, img_shape)
-
-            # multi level into a same
-            bev_feat = bev_feat.flatten(1, 2)
-            bev_feat = self.outconvs(bev_feat)
-
-        if self.use_lidar:
-            lidar_feat = self.get_lidar_feature(points)
-            if self.use_image:
-                bev_feat = torch.cat([bev_feat,lidar_feat],dim=1)
-            else:
-                bev_feat = lidar_feat
-
-        return bev_feat
-
-    def ipm(self, cam_feat, ego2cam, img_shape):
-        '''
-            inverse project 
-            Args:
-                cam_feat: B*ncam, C, cH, cW
-                img_shape: tuple(H, W)
-            Returns:
-                project_feat: B, C, nlvl, bH, bW
-                bev_feat_mask: B, 1, nlvl, bH, bW
-        '''
-        C = cam_feat.shape[1]
-        bev_grid = self.bev_planes.unsqueeze(0).repeat(self.B, 1, 1, 1, 1)
-        nlvl, bH, bW = bev_grid.shape[1:4]
-        bev_grid = bev_grid.flatten(1, 3)  # B, nlvl*W*H, 3
-
-        # Find points in cam coords
-        # bev_grid_pos: B*ncam, nlvl*bH*bW, 2
-        bev_grid_pos, bev_cam_mask = get_campos(bev_grid, ego2cam, img_shape)
-        # B*cam, nlvl*bH, bW, 2
-        bev_grid_pos = bev_grid_pos.unflatten(-2, (nlvl*bH, bW))
-
-        # project feat from 2D to bev plane
-        projected_feature = F.grid_sample(
-            cam_feat, bev_grid_pos, align_corners=False).view(self.B, -1, C, nlvl, bH, bW)  # B,cam,C,nlvl,bH,bW
-
-        # B,cam,nlvl,bH,bW
-        bev_feat_mask = bev_cam_mask.unflatten(-1, (nlvl, bH, bW))
-
-        # eliminate the ncam
-        # The bev feature is the sum of the 6 cameras
-        bev_feat_mask = bev_feat_mask.unsqueeze(2)
-        projected_feature = (projected_feature*bev_feat_mask).sum(1)
-        num_feat = bev_feat_mask.sum(1)
-
-        projected_feature = projected_feature / \
-            num_feat.masked_fill(num_feat == 0, 1)
-
-        # concatenate a position information
-        # projected_feature: B, bH, bW, nlvl, C+3
-        bev_grid = bev_grid.view(self.B, nlvl, bH, bW,
-                                 3).permute(0, 4, 1, 2, 3)
-        projected_feature = torch.cat(
-            (projected_feature, bev_grid), dim=1)
-
-        return projected_feature, bev_feat_mask.sum(1) > 0
-
-    def get_lidar_feature(self, points):
-        ptensor, pmask = points
-        lidar_feature = self.pp(ptensor, pmask)
-
-        # bev_grid = self.bev_planes[...,:-1].unsqueeze(0).repeat(self.B, 1, 1, 1, 1)
-        # bev_grid = bev_grid[:,0]
-
-        # bev_grid = bev_grid.permute(0, 3, 1, 2)
-        # lidar_feature = torch.cat(
-        #     (lidar_feature, bev_grid), dim=1)
-        
-        lidar_feature = self.outconvs_lidar(lidar_feature)
-
-        return lidar_feature
-
-
-def construct_plane_grid(xbound, ybound, height: float, dtype=torch.float32):
-    '''
-        Returns:
-            plane: H, W, 3
-    '''
-
-    xmin, xmax = xbound[0], xbound[1]
-    num_x = int((xbound[1] - xbound[0]) / xbound[2])
-    ymin, ymax = ybound[0], ybound[1]
-    num_y = int((ybound[1] - ybound[0]) / ybound[2])
-
-    x = torch.linspace(xmin, xmax, num_x, dtype=dtype)
-    y = torch.linspace(ymin, ymax, num_y, dtype=dtype)
-
-    # [num_y, num_x]
-    y, x = torch.meshgrid(y, x)
-
-    z = torch.ones_like(x) * height
-
-    # [num_y, num_x, 3]
-    plane = torch.stack([x, y, z], dim=-1)
-
-    return plane
-
-
-def get_campos(reference_points, ego2cam, img_shape):
-    '''
-        Find the each refence point's corresponding pixel in each camera
-        Args: 
-            reference_points: [B, num_query, 3]
-            ego2cam: (B, num_cam, 4, 4)
-        Outs:
-            reference_points_cam: (B*num_cam, num_query, 2)
-            mask:  (B, num_cam, num_query)
-            num_query == W*H
-    '''
-
-    ego2cam = reference_points.new_tensor(ego2cam)  # (B, N, 4, 4)
-    reference_points = reference_points.clone()
-
-    B, num_query = reference_points.shape[:2]
-    num_cam = ego2cam.shape[1]
-
-    # reference_points (B, num_queries, 4)
-    reference_points = torch.cat(
-        (reference_points, torch.ones_like(reference_points[..., :1])), -1)
-    reference_points = reference_points.view(
-        B, 1, num_query, 4).repeat(1, num_cam, 1, 1).unsqueeze(-1)
-
-    ego2cam = ego2cam.view(
-        B, num_cam, 1, 4, 4).repeat(1, 1, num_query, 1, 1)
-
-    # reference_points_cam (B, num_cam, num_queries, 4)
-    reference_points_cam = (ego2cam @ reference_points).squeeze(-1)
-
-    eps = 1e-9
-    mask = (reference_points_cam[..., 2:3] > eps)
-
-    reference_points_cam =\
-        reference_points_cam[..., 0:2] / \
-        reference_points_cam[..., 2:3] + eps
-
-    reference_points_cam[..., 0] /= img_shape[1]
-    reference_points_cam[..., 1] /= img_shape[0]
-
-    # from 0~1 to -1~1
-    reference_points_cam = (reference_points_cam - 0.5) * 2
-
-    mask = (mask & (reference_points_cam[..., 0:1] > -1.0)
-                 & (reference_points_cam[..., 0:1] < 1.0)
-                 & (reference_points_cam[..., 1:2] > -1.0)
-                 & (reference_points_cam[..., 1:2] < 1.0))
-
-    # (B, num_cam, num_query)
-    mask = mask.view(B, num_cam, num_query)
-    reference_points_cam = reference_points_cam.view(B*num_cam, num_query, 2)
-
-    return reference_points_cam, mask
-
-
-def _test():
-    pass
-
-
-if __name__ == '__main__':
-    _test()
+import copy
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmdet3d.models.builder import BACKBONES
+from mmdet.models import build_backbone, build_neck
+
+class UpsampleBlock(nn.Module):
+    def __init__(self, ins, outs):
+        super(UpsampleBlock, self).__init__()
+        self.gn = nn.GroupNorm(32, outs)
+        self.conv = nn.Conv2d(ins, outs, kernel_size=3,
+                              stride=1, padding=1)  # same
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+
+        x = self.conv(x)
+        x = self.relu(self.gn(x))
+        x = self.upsample2x(x)
+
+        return x
+
+    def upsample2x(self, x):
+        _, _, h, w = x.shape
+        x = F.interpolate(x, size=(h*2, w*2),
+                          mode='bilinear', align_corners=True)
+        return x
+
+
+class Upsample(nn.Module):
+
+    def __init__(self,
+                 zoom_size=(2, 4, 8),
+                 in_channels=128,
+                 out_channels=128,
+                 ):
+        super(Upsample, self).__init__()
+
+        self.out_channels = out_channels
+
+        input_conv = UpsampleBlock(in_channels, out_channels)
+        inter_conv = UpsampleBlock(out_channels, out_channels)
+
+        fscale = []
+        for scale_factor in zoom_size:
+
+            layer_num = int(math.log2(scale_factor))
+            if layer_num < 1:
+                fscale.append(nn.Identity())
+                continue
+
+            tmp = [copy.deepcopy(input_conv), ]
+            tmp += [copy.deepcopy(inter_conv) for i in range(layer_num-1)]
+            fscale.append(nn.Sequential(*tmp))
+
+        self.fscale = nn.ModuleList(fscale)
+
+    def init_weights(self):
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_uniform_(m.weight, a=1)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, imgs):
+
+        rescale_i = []
+        for f, img in zip(self.fscale, imgs):
+            rescale_i.append(f(img))
+
+        out = sum(rescale_i)
+
+        return out
+
+
+@BACKBONES.register_module()
+class IPMEncoder(nn.Module):
+    '''
+    encode cam features
+    '''
+
+    def __init__(self,
+                 img_backbone,
+                 img_neck,
+                 upsample,
+                 xbound=[-30.0, 30.0, 0.5],
+                 ybound=[-15.0, 15.0, 0.5],
+                 zbound=[-10.0, 10.0, 20.0],
+                 heights=[-1.1, 0, 0.5, 1.1],
+                 pretrained=None,
+                 out_channels=128,
+                 num_cam=6,
+                 use_lidar=False,
+                 use_image=True,
+                 lidar_dim=128,
+                 ):
+        super(IPMEncoder, self).__init__()
+        self.x_bound = xbound
+        self.y_bound = ybound
+        self.heights = heights
+
+        self.num_cam = num_cam
+
+        num_x = int((xbound[1] - xbound[0]) / xbound[2])
+        num_y = int((ybound[1] - ybound[0]) / ybound[2])
+
+        self.img_backbone = build_backbone(img_backbone)
+        self.img_neck = build_neck(img_neck)
+        self.upsample = Upsample(**upsample)
+
+        self.use_image = use_image
+        self.use_lidar = use_lidar
+        if self.use_lidar:
+            self.pp = PointPillarEncoder(lidar_dim, xbound, ybound, zbound)
+
+            self.outconvs =\
+                nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels//2, 
+                            kernel_size=3, stride=1, padding=1)  # same
+            if self.use_image:
+                _out_channels = out_channels//2
+            else:
+                _out_channels = out_channels
+
+            self.outconvs_lidar =\
+                nn.Conv2d(lidar_dim, _out_channels, 
+                            kernel_size=3, stride=1, padding=1)  # same
+        else:
+            self.outconvs =\
+                nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels, 
+                            kernel_size=3, stride=1, padding=1)  # same
+
+        self.init_weights(pretrained=pretrained)
+
+        # bev_plane
+        bev_planes = [construct_plane_grid(
+            xbound, ybound, h) for h in self.heights]
+        self.register_buffer('bev_planes', torch.stack(
+            bev_planes),)  # nlvl,bH,bW,2
+
+        self.masked_embeds = nn.Embedding(len(heights), out_channels)
+
+
+    def init_weights(self, pretrained=None):
+        """Initialize model weights."""
+
+        self.img_backbone.init_weights()
+        self.img_neck.init_weights()
+        self.upsample.init_weights()
+
+        for p in self.outconvs.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        
+        if self.use_lidar:
+            for p in self.outconvs_lidar.parameters():
+                if p.dim() > 1:
+                    nn.init.xavier_uniform_(p)
+            
+            for p in self.pp.parameters():
+                if p.dim() > 1:
+                    nn.init.xavier_uniform_(p)
+
+    def extract_img_feat(self, imgs):
+        '''
+            Extract image feaftures and sum up into one pic
+            Args:
+                imgs: B, n_cam, C, iH, iW
+            Returns: 
+                img_feat: B * n_cam, C, H, W
+        '''
+
+        B, n_cam, C, iH, iW = imgs.shape
+        imgs = imgs.view(B * n_cam, C, iH, iW)
+
+        img_feats = self.img_backbone(imgs)
+
+        # reduce the channel dim
+        img_feats = self.img_neck(img_feats)
+
+        # fuse four feature map
+        img_feat = self.upsample(img_feats)
+
+        return img_feat
+
+    def forward(self, imgs, img_metas, *args, points=None, **kwargs):
+        '''
+            Args: 
+                imgs: torch.Tensor of shape [B, N, 3, H, W]
+                    N: number of cams
+                img_metas: 
+                    # N=6, ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT']
+                    ego2cam: [B, N, 4, 4] 
+                    cam_intrinsics: [B, N, 3, 3]
+                    cam2ego_rotations: [B, N, 3, 3]
+                    cam2ego_translations: [B, N, 3]
+                    ...
+            Outs:
+                bev_feature: torch.Tensor of shape [B, C*nlvl, bH, bW]
+        '''
+
+        if self.use_image:
+            self.B = imgs.shape[0]
+
+            # Get transform matrix
+            ego2cam = []
+            for img_meta in img_metas:
+                ego2cam.append(img_meta['ego2img'])
+            img_shape = imgs.shape[-2:]
+
+            ego2cam = np.asarray(ego2cam)
+            # Image backbone
+            img_feats = self.extract_img_feat(imgs)
+
+            # IPM
+            bev_feat, bev_feat_mask = self.ipm(img_feats, ego2cam, img_shape)
+
+            # multi level into a same
+            bev_feat = bev_feat.flatten(1, 2)
+            bev_feat = self.outconvs(bev_feat)
+
+        if self.use_lidar:
+            lidar_feat = self.get_lidar_feature(points)
+            if self.use_image:
+                bev_feat = torch.cat([bev_feat,lidar_feat],dim=1)
+            else:
+                bev_feat = lidar_feat
+
+        return bev_feat
+
+    def ipm(self, cam_feat, ego2cam, img_shape):
+        '''
+            inverse project 
+            Args:
+                cam_feat: B*ncam, C, cH, cW
+                img_shape: tuple(H, W)
+            Returns:
+                project_feat: B, C, nlvl, bH, bW
+                bev_feat_mask: B, 1, nlvl, bH, bW
+        '''
+        C = cam_feat.shape[1]
+        bev_grid = self.bev_planes.unsqueeze(0).repeat(self.B, 1, 1, 1, 1)
+        nlvl, bH, bW = bev_grid.shape[1:4]
+        bev_grid = bev_grid.flatten(1, 3)  # B, nlvl*W*H, 3
+
+        # Find points in cam coords
+        # bev_grid_pos: B*ncam, nlvl*bH*bW, 2
+        bev_grid_pos, bev_cam_mask = get_campos(bev_grid, ego2cam, img_shape)
+        # B*cam, nlvl*bH, bW, 2
+        bev_grid_pos = bev_grid_pos.unflatten(-2, (nlvl*bH, bW))
+
+        # project feat from 2D to bev plane
+        projected_feature = F.grid_sample(
+            cam_feat, bev_grid_pos, align_corners=False).view(self.B, -1, C, nlvl, bH, bW)  # B,cam,C,nlvl,bH,bW
+
+        # B,cam,nlvl,bH,bW
+        bev_feat_mask = bev_cam_mask.unflatten(-1, (nlvl, bH, bW))
+
+        # eliminate the ncam
+        # The bev feature is the sum of the 6 cameras
+        bev_feat_mask = bev_feat_mask.unsqueeze(2)
+        projected_feature = (projected_feature*bev_feat_mask).sum(1)
+        num_feat = bev_feat_mask.sum(1)
+
+        projected_feature = projected_feature / \
+            num_feat.masked_fill(num_feat == 0, 1)
+
+        # concatenate a position information
+        # projected_feature: B, bH, bW, nlvl, C+3
+        bev_grid = bev_grid.view(self.B, nlvl, bH, bW,
+                                 3).permute(0, 4, 1, 2, 3)
+        projected_feature = torch.cat(
+            (projected_feature, bev_grid), dim=1)
+
+        return projected_feature, bev_feat_mask.sum(1) > 0
+
+    def get_lidar_feature(self, points):
+        ptensor, pmask = points
+        lidar_feature = self.pp(ptensor, pmask)
+
+        # bev_grid = self.bev_planes[...,:-1].unsqueeze(0).repeat(self.B, 1, 1, 1, 1)
+        # bev_grid = bev_grid[:,0]
+
+        # bev_grid = bev_grid.permute(0, 3, 1, 2)
+        # lidar_feature = torch.cat(
+        #     (lidar_feature, bev_grid), dim=1)
+        
+        lidar_feature = self.outconvs_lidar(lidar_feature)
+
+        return lidar_feature
+
+
+def construct_plane_grid(xbound, ybound, height: float, dtype=torch.float32):
+    '''
+        Returns:
+            plane: H, W, 3
+    '''
+
+    xmin, xmax = xbound[0], xbound[1]
+    num_x = int((xbound[1] - xbound[0]) / xbound[2])
+    ymin, ymax = ybound[0], ybound[1]
+    num_y = int((ybound[1] - ybound[0]) / ybound[2])
+
+    x = torch.linspace(xmin, xmax, num_x, dtype=dtype)
+    y = torch.linspace(ymin, ymax, num_y, dtype=dtype)
+
+    # [num_y, num_x]
+    y, x = torch.meshgrid(y, x)
+
+    z = torch.ones_like(x) * height
+
+    # [num_y, num_x, 3]
+    plane = torch.stack([x, y, z], dim=-1)
+
+    return plane
+
+
+def get_campos(reference_points, ego2cam, img_shape):
+    '''
+        Find the each refence point's corresponding pixel in each camera
+        Args: 
+            reference_points: [B, num_query, 3]
+            ego2cam: (B, num_cam, 4, 4)
+        Outs:
+            reference_points_cam: (B*num_cam, num_query, 2)
+            mask:  (B, num_cam, num_query)
+            num_query == W*H
+    '''
+
+    ego2cam = reference_points.new_tensor(ego2cam)  # (B, N, 4, 4)
+    reference_points = reference_points.clone()
+
+    B, num_query = reference_points.shape[:2]
+    num_cam = ego2cam.shape[1]
+
+    # reference_points (B, num_queries, 4)
+    reference_points = torch.cat(
+        (reference_points, torch.ones_like(reference_points[..., :1])), -1)
+    reference_points = reference_points.view(
+        B, 1, num_query, 4).repeat(1, num_cam, 1, 1).unsqueeze(-1)
+
+    ego2cam = ego2cam.view(
+        B, num_cam, 1, 4, 4).repeat(1, 1, num_query, 1, 1)
+
+    # reference_points_cam (B, num_cam, num_queries, 4)
+    reference_points_cam = (ego2cam @ reference_points).squeeze(-1)
+
+    eps = 1e-9
+    mask = (reference_points_cam[..., 2:3] > eps)
+
+    reference_points_cam =\
+        reference_points_cam[..., 0:2] / \
+        reference_points_cam[..., 2:3] + eps
+
+    reference_points_cam[..., 0] /= img_shape[1]
+    reference_points_cam[..., 1] /= img_shape[0]
+
+    # from 0~1 to -1~1
+    reference_points_cam = (reference_points_cam - 0.5) * 2
+
+    mask = (mask & (reference_points_cam[..., 0:1] > -1.0)
+                 & (reference_points_cam[..., 0:1] < 1.0)
+                 & (reference_points_cam[..., 1:2] > -1.0)
+                 & (reference_points_cam[..., 1:2] < 1.0))
+
+    # (B, num_cam, num_query)
+    mask = mask.view(B, num_cam, num_query)
+    reference_points_cam = reference_points_cam.view(B*num_cam, num_query, 2)
+
+    return reference_points_cam, mask
+
+
+def _test():
+    pass
+
+
+if __name__ == '__main__':
+    _test()
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/__init__.py
-from .base_map_head import BaseMapHead
-from .dg_head import DGHead
-from .map_element_detector import MapElementDetector
+from .base_map_head import BaseMapHead
+from .dg_head import DGHead
+from .map_element_detector import MapElementDetector
 from .polyline_generator import PolylineGenerator
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/base_map_head.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/base_map_head.py
-from abc import ABCMeta, abstractmethod
-
-import torch.nn as nn
-from mmcv.runner import auto_fp16
-from mmcv.utils import print_log
-
-from mmdet.utils import get_root_logger
-
-
-class BaseMapHead(nn.Module, metaclass=ABCMeta):
-    """Base class for mappers."""
-
-    def __init__(self):
-        super(BaseMapHead, self).__init__()
-        self.fp16_enabled = False
-
-    def init_weights(self, pretrained=None):
-        """Initialize the weights in detector.
-        Args:
-            pretrained (str, optional): Path to pre-trained weights.
-                Defaults to None.
-        """
-        if pretrained is not None:
-            logger = get_root_logger()
-            print_log(f'load model from: {pretrained}', logger=logger)
-
-    @auto_fp16(apply_to=('img', ))
-    def forward(self, *args, **kwargs):
-        pass
-        
-    @abstractmethod
-    def loss(self, pred, gt):
-        '''
-        Compute loss
-        Output:
-            dict(
-                loss: torch.Tensor
-                log_vars: dict(
-                    str: float,
-                )
-                num_samples: int
-            )
-        '''
-        return
-        
-    @abstractmethod
-    def post_process(self, pred):
-        '''
-        convert model predictions to vectorized outputs
-        the output format should be consistent with the evaluation function
-        '''
-        return
+from abc import ABCMeta, abstractmethod
+
+import torch.nn as nn
+from mmcv.runner import auto_fp16
+from mmcv.utils import print_log
+
+from mmdet.utils import get_root_logger
+
+
+class BaseMapHead(nn.Module, metaclass=ABCMeta):
+    """Base class for mappers."""
+
+    def __init__(self):
+        super(BaseMapHead, self).__init__()
+        self.fp16_enabled = False
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in detector.
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if pretrained is not None:
+            logger = get_root_logger()
+            print_log(f'load model from: {pretrained}', logger=logger)
+
+    @auto_fp16(apply_to=('img', ))
+    def forward(self, *args, **kwargs):
+        pass
+        
+    @abstractmethod
+    def loss(self, pred, gt):
+        '''
+        Compute loss
+        Output:
+            dict(
+                loss: torch.Tensor
+                log_vars: dict(
+                    str: float,
+                )
+                num_samples: int
+            )
+        '''
+        return
+        
+    @abstractmethod
+    def post_process(self, pred):
+        '''
+        convert model predictions to vectorized outputs
+        the output format should be consistent with the evaluation function
+        '''
+        return
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/__init__.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/__init__.py
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/causal_trans.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/causal_trans.py
-# the causal layer is credited by the https://github.com/alexmt-scale/causal-transformer-decoder
-# we made some change to stick with the polygen.
-import torch
-import torch.nn as nn
-from typing import Optional
-from torch import Tensor
-
-from mmcv.cnn.bricks.registry import ATTENTION
-from mmcv.utils import build_from_cfg
-
-
-def build_attention(cfg, default_args=None):
-    """Builder for attention."""
-    return build_from_cfg(cfg, ATTENTION, default_args)
-
-
-class CausalTransformerDecoder(nn.TransformerDecoder):
-    """Implementation of a transformer decoder based on torch implementation but
-    more efficient. The difference is that it doesn't need to recompute the
-    embeddings of all the past decoded tokens but instead uses a cache to
-    store them. This makes use of the fact that the attention of a decoder is
-    causal, so new predicted tokens don't affect the old tokens' embedding bc
-    the corresponding attention cells are masked.
-    The complexity goes from seq_len^3 to seq_len^2.
-    This only happens in eval mode.
-    In training mode, teacher forcing makes these optimizations unnecessary. Hence the
-    Decoder acts like a regular nn.TransformerDecoder (except that the attention tgt
-    masks are handled for you).
-    """
-
-    def forward(
-        self,
-        tgt: Tensor,
-        memory: Optional[Tensor] = None,
-        cache: Optional[Tensor] = None,
-        memory_mask: Optional[Tensor] = None,
-        tgt_key_padding_mask: Optional[Tensor] = None,
-        memory_key_padding_mask: Optional[Tensor] = None,
-        causal_mask: Optional[Tensor] = None,
-    ) -> Tensor:
-        """
-        Args:
-            tgt (Tensor): current_len_output x bsz x hidden_dim
-            memory (Tensor): len_encoded_seq x bsz x hidden_dim
-            cache (Optional[Tensor]):
-                n_layers x (current_len_output - 1) x bsz x hidden_dim
-                If current_len_output == 1, nothing is cached yet, so cache
-                should be None. Same if the module is in training mode.
-            others (Optional[Tensor]): see official documentations
-        Returns:
-            output (Tensor): current_len_output x bsz x hidden_dim
-            cache (Optional[Tensor]): n_layers x current_len_output x bsz x hidden_dim
-                Only returns it when module is in eval mode (no caching in training)
-        """
-
-        output = tgt
-
-        if self.training:
-            if cache is not None:
-                raise ValueError(
-                    "cache parameter should be None in training mode")
-            for mod in self.layers:
-                output = mod(
-                    output,
-                    memory,
-                    memory_mask=memory_mask,
-                    tgt_key_padding_mask=tgt_key_padding_mask,
-                    memory_key_padding_mask=memory_key_padding_mask,
-                    causal_mask=causal_mask,
-                    only_last=False,
-                )
-
-            return output, cache
-        else:
-            new_token_cache = []
-            for i, mod in enumerate(self.layers):
-                output = mod(output, memory,
-                             memory_mask=memory_mask,
-                             tgt_key_padding_mask=tgt_key_padding_mask,
-                             memory_key_padding_mask=memory_key_padding_mask,
-                             causal_mask=causal_mask,
-                             only_last=True if cache is not None else False)
-                new_token_cache.append(output)
-
-                # use the pre_calculated intermediate parameters.
-                if cache is not None:
-                    output = torch.cat([cache[i], output], dim=0)
-
-            if cache is not None:
-                new_cache = torch.cat(
-                    [cache, torch.stack(new_token_cache, dim=0)], dim=1)
-            else:
-                new_cache = torch.stack(new_token_cache, dim=0)
-
-            return output, new_cache
-
-
-class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
-
-    def __init__(self, *args, re_zero=True, norm_first=True, map_attn_cfg=None, **kwargs):
-        '''
-            Args:
-                re_zero: If True, alpha scale residuals with zero init.
-        '''
-        super(CausalTransformerDecoderLayer, self).__init__(*args, **kwargs)
-
-        if re_zero:
-            self.res_weight1 = nn.Parameter(torch.FloatTensor([0, ]))
-            self.res_weight2 = nn.Parameter(torch.FloatTensor([0, ]))
-            self.res_weight3 = nn.Parameter(torch.FloatTensor([0, ]))
-        else:
-            self.res_weight1 = 1.
-            self.res_weight2 = 1.
-            self.res_weight3 = 1.
-
-        self.norm_first = norm_first
-
-        self.map_attn = None
-        if map_attn_cfg is not None:
-            self.map_attn = build_attention(map_attn_cfg)
-
-    def forward(
-            self,
-            tgt: Tensor,
-            memory: Optional[Tensor] = None,
-            memory_mask: Optional[Tensor] = None,
-            tgt_key_padding_mask: Optional[Tensor] = None,
-            memory_key_padding_mask: Optional[Tensor] = None,
-            causal_mask: Optional[Tensor] = None,
-            query: Optional[Tensor] = None,
-            only_last=False) -> Tensor:
-        """
-        Args:
-            see CausalTransformerDecoder
-            query is not None model will perform query stream 
-        Returns:
-            Tensor:
-                If training: embedding of the whole layer: seq_len x bsz x hidden_dim
-                If eval mode: embedding of last token: 1 x bsz x hidden_dim
-        """
-        if not self.norm_first:
-            raise ValueError(
-                "norm_first parameter should be True!")
-
-        if self.training:
-            # the official Pytorch implementation
-            x = tgt
-            if query is not None:
-                x = query
-            
-            x = x + self.res_weight1 * \
-                self._sa_block(self.norm1(x), self.norm1(tgt), causal_mask,
-                                tgt_key_padding_mask)
-            if memory is not None:
-                x = x + self.res_weight2 * \
-                    self._mha_block(self.norm2(x), memory,
-                                    memory_mask, memory_key_padding_mask)
-            x = x + self.res_weight3*self._ff_block(self.norm3(x))
-            
-            return x
-
-        # This part is adapted from the official Pytorch implementation
-        # So that only the last token gets modified and returned.
-        # we follow the pre-LN trans in https://arxiv.org/pdf/2002.04745v1.pdf .
-
-        x = tgt
-        if query is not None:
-            x = query
-
-        if only_last:
-            x = x[-1:]
-            
-        if causal_mask is not None:
-            attn_mask = causal_mask 
-            if only_last:
-                attn_mask = attn_mask[-1:]   # XXX
-        else:
-            attn_mask = None
-            
-        # efficient self attention
-        x = x + self.res_weight1 * \
-            self._sa_block(self.norm1(x), self.norm1(tgt), attn_mask,
-                           tgt_key_padding_mask)
-
-        # encoder-decoder attention
-        if memory is not None:
-            x = x + self.res_weight2 * \
-                self._mha_block(self.norm2(x), memory,
-                                memory_mask, memory_key_padding_mask)
-
-        # final feed-forward network
-        x = x + self.res_weight3*self._ff_block(self.norm3(x))
-
-        return x
-
-    # self-attention block
-    def _sa_block(self, x: Tensor, mem: Tensor,
-                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
-        x = self.self_attn(x, mem, mem,
-                           attn_mask=attn_mask,
-                           key_padding_mask=key_padding_mask,
-                           need_weights=False)[0]
-        return self.dropout1(x)
-
-    # multihead attention block
-    def _mha_block(self, x: Tensor, mem: Tensor,
-                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
-        x = self.multihead_attn(x, mem, mem,
-                                attn_mask=attn_mask,
-                                key_padding_mask=key_padding_mask,
-                                need_weights=False)[0]
-        return self.dropout2(x)
-
-    # feed forward block
-    def _ff_block(self, x: Tensor) -> Tensor:
-        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
-        return self.dropout3(x)
-
-
-class PolygenTransformerEncoderLayer(nn.TransformerEncoderLayer):
-
-    def __init__(self, *args, re_zero=True, norm_first=True, **kwargs):
-        '''
-            Args:
-                re_zero: If True, alpha scale residuals with zero init.
-        '''
-        super(PolygenTransformerEncoderLayer, self).__init__(*args, **kwargs)
-
-        if re_zero:
-            self.res_weight1 = nn.Parameter(torch.FloatTensor([0, ]))
-            self.res_weight2 = nn.Parameter(torch.FloatTensor([0, ]))
-        else:
-            self.res_weight1 = 1.
-            self.res_weight2 = 1.
-
-        self.norm_first = norm_first
-
-    def forward(self, src: Tensor, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
-        r"""Pass the input through the encoder layer.
-        Args:
-            src: the sequence to the encoder layer (required).
-            src_mask: the mask for the src sequence (optional).
-            src_key_padding_mask: the mask for the src keys per batch (optional).
-        Shape:
-            see the docs in Transformer class.
-        """
-
-        # see Fig. 1 of https://arxiv.org/pdf/2002.04745v1.pdf
-
-        x = src
-        if self.norm_first:
-            x = x + self.res_weight1*self._sa_block(self.norm1(x), src_mask,
-                                                    src_key_padding_mask)
-            x = x + self.res_weight2*self._ff_block(self.norm2(x))
-        else:
-            x = self.norm1(
-                x + self.res_weight1*self._sa_block(x, src_mask, src_key_padding_mask))
-            x = self.norm2(x + self.res_weight2*self._ff_block(x))
-
-        return x
-
-    # self-attention block
-    def _sa_block(self, x: Tensor,
-                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
-        x = self.self_attn(x, x, x,
-                           attn_mask=attn_mask,
-                           key_padding_mask=key_padding_mask,
-                           need_weights=False)[0]
-        return self.dropout1(x)
-
-    # feed forward block
-    def _ff_block(self, x: Tensor) -> Tensor:
-        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
-        return self.dropout2(x)
-
-
-def generate_square_subsequent_mask(sz: int, device: str = "cpu") -> torch.Tensor:
-    """ Generate the attention mask for causal decoding """
-    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
-    mask = (
-        mask.float()
-        .masked_fill(mask == 0, float("-inf"))
-        .masked_fill(mask == 1, float(0.0))
-    ).to(device=device)
+# the causal layer is credited by the https://github.com/alexmt-scale/causal-transformer-decoder
+# we made some change to stick with the polygen.
+import torch
+import torch.nn as nn
+from typing import Optional
+from torch import Tensor
+
+from mmcv.cnn.bricks.registry import ATTENTION
+from mmcv.utils import build_from_cfg
+
+
+def build_attention(cfg, default_args=None):
+    """Builder for attention."""
+    return build_from_cfg(cfg, ATTENTION, default_args)
+
+
+class CausalTransformerDecoder(nn.TransformerDecoder):
+    """Implementation of a transformer decoder based on torch implementation but
+    more efficient. The difference is that it doesn't need to recompute the
+    embeddings of all the past decoded tokens but instead uses a cache to
+    store them. This makes use of the fact that the attention of a decoder is
+    causal, so new predicted tokens don't affect the old tokens' embedding bc
+    the corresponding attention cells are masked.
+    The complexity goes from seq_len^3 to seq_len^2.
+    This only happens in eval mode.
+    In training mode, teacher forcing makes these optimizations unnecessary. Hence the
+    Decoder acts like a regular nn.TransformerDecoder (except that the attention tgt
+    masks are handled for you).
+    """
+
+    def forward(
+        self,
+        tgt: Tensor,
+        memory: Optional[Tensor] = None,
+        cache: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        causal_mask: Optional[Tensor] = None,
+    ) -> Tensor:
+        """
+        Args:
+            tgt (Tensor): current_len_output x bsz x hidden_dim
+            memory (Tensor): len_encoded_seq x bsz x hidden_dim
+            cache (Optional[Tensor]):
+                n_layers x (current_len_output - 1) x bsz x hidden_dim
+                If current_len_output == 1, nothing is cached yet, so cache
+                should be None. Same if the module is in training mode.
+            others (Optional[Tensor]): see official documentations
+        Returns:
+            output (Tensor): current_len_output x bsz x hidden_dim
+            cache (Optional[Tensor]): n_layers x current_len_output x bsz x hidden_dim
+                Only returns it when module is in eval mode (no caching in training)
+        """
+
+        output = tgt
+
+        if self.training:
+            if cache is not None:
+                raise ValueError(
+                    "cache parameter should be None in training mode")
+            for mod in self.layers:
+                output = mod(
+                    output,
+                    memory,
+                    memory_mask=memory_mask,
+                    tgt_key_padding_mask=tgt_key_padding_mask,
+                    memory_key_padding_mask=memory_key_padding_mask,
+                    causal_mask=causal_mask,
+                    only_last=False,
+                )
+
+            return output, cache
+        else:
+            new_token_cache = []
+            for i, mod in enumerate(self.layers):
+                output = mod(output, memory,
+                             memory_mask=memory_mask,
+                             tgt_key_padding_mask=tgt_key_padding_mask,
+                             memory_key_padding_mask=memory_key_padding_mask,
+                             causal_mask=causal_mask,
+                             only_last=True if cache is not None else False)
+                new_token_cache.append(output)
+
+                # use the pre_calculated intermediate parameters.
+                if cache is not None:
+                    output = torch.cat([cache[i], output], dim=0)
+
+            if cache is not None:
+                new_cache = torch.cat(
+                    [cache, torch.stack(new_token_cache, dim=0)], dim=1)
+            else:
+                new_cache = torch.stack(new_token_cache, dim=0)
+
+            return output, new_cache
+
+
+class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
+
+    def __init__(self, *args, re_zero=True, norm_first=True, map_attn_cfg=None, **kwargs):
+        '''
+            Args:
+                re_zero: If True, alpha scale residuals with zero init.
+        '''
+        super(CausalTransformerDecoderLayer, self).__init__(*args, **kwargs)
+
+        if re_zero:
+            self.res_weight1 = nn.Parameter(torch.FloatTensor([0, ]))
+            self.res_weight2 = nn.Parameter(torch.FloatTensor([0, ]))
+            self.res_weight3 = nn.Parameter(torch.FloatTensor([0, ]))
+        else:
+            self.res_weight1 = 1.
+            self.res_weight2 = 1.
+            self.res_weight3 = 1.
+
+        self.norm_first = norm_first
+
+        self.map_attn = None
+        if map_attn_cfg is not None:
+            self.map_attn = build_attention(map_attn_cfg)
+
+    def forward(
+            self,
+            tgt: Tensor,
+            memory: Optional[Tensor] = None,
+            memory_mask: Optional[Tensor] = None,
+            tgt_key_padding_mask: Optional[Tensor] = None,
+            memory_key_padding_mask: Optional[Tensor] = None,
+            causal_mask: Optional[Tensor] = None,
+            query: Optional[Tensor] = None,
+            only_last=False) -> Tensor:
+        """
+        Args:
+            see CausalTransformerDecoder
+            query is not None model will perform query stream 
+        Returns:
+            Tensor:
+                If training: embedding of the whole layer: seq_len x bsz x hidden_dim
+                If eval mode: embedding of last token: 1 x bsz x hidden_dim
+        """
+        if not self.norm_first:
+            raise ValueError(
+                "norm_first parameter should be True!")
+
+        if self.training:
+            # the official Pytorch implementation
+            x = tgt
+            if query is not None:
+                x = query
+            
+            x = x + self.res_weight1 * \
+                self._sa_block(self.norm1(x), self.norm1(tgt), causal_mask,
+                                tgt_key_padding_mask)
+            if memory is not None:
+                x = x + self.res_weight2 * \
+                    self._mha_block(self.norm2(x), memory,
+                                    memory_mask, memory_key_padding_mask)
+            x = x + self.res_weight3*self._ff_block(self.norm3(x))
+            
+            return x
+
+        # This part is adapted from the official Pytorch implementation
+        # So that only the last token gets modified and returned.
+        # we follow the pre-LN trans in https://arxiv.org/pdf/2002.04745v1.pdf .
+
+        x = tgt
+        if query is not None:
+            x = query
+
+        if only_last:
+            x = x[-1:]
+            
+        if causal_mask is not None:
+            attn_mask = causal_mask 
+            if only_last:
+                attn_mask = attn_mask[-1:]   # XXX
+        else:
+            attn_mask = None
+            
+        # efficient self attention
+        x = x + self.res_weight1 * \
+            self._sa_block(self.norm1(x), self.norm1(tgt), attn_mask,
+                           tgt_key_padding_mask)
+
+        # encoder-decoder attention
+        if memory is not None:
+            x = x + self.res_weight2 * \
+                self._mha_block(self.norm2(x), memory,
+                                memory_mask, memory_key_padding_mask)
+
+        # final feed-forward network
+        x = x + self.res_weight3*self._ff_block(self.norm3(x))
+
+        return x
+
+    # self-attention block
+    def _sa_block(self, x: Tensor, mem: Tensor,
+                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
+        x = self.self_attn(x, mem, mem,
+                           attn_mask=attn_mask,
+                           key_padding_mask=key_padding_mask,
+                           need_weights=False)[0]
+        return self.dropout1(x)
+
+    # multihead attention block
+    def _mha_block(self, x: Tensor, mem: Tensor,
+                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
+        x = self.multihead_attn(x, mem, mem,
+                                attn_mask=attn_mask,
+                                key_padding_mask=key_padding_mask,
+                                need_weights=False)[0]
+        return self.dropout2(x)
+
+    # feed forward block
+    def _ff_block(self, x: Tensor) -> Tensor:
+        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
+        return self.dropout3(x)
+
+
+class PolygenTransformerEncoderLayer(nn.TransformerEncoderLayer):
+
+    def __init__(self, *args, re_zero=True, norm_first=True, **kwargs):
+        '''
+            Args:
+                re_zero: If True, alpha scale residuals with zero init.
+        '''
+        super(PolygenTransformerEncoderLayer, self).__init__(*args, **kwargs)
+
+        if re_zero:
+            self.res_weight1 = nn.Parameter(torch.FloatTensor([0, ]))
+            self.res_weight2 = nn.Parameter(torch.FloatTensor([0, ]))
+        else:
+            self.res_weight1 = 1.
+            self.res_weight2 = 1.
+
+        self.norm_first = norm_first
+
+    def forward(self, src: Tensor, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
+        r"""Pass the input through the encoder layer.
+        Args:
+            src: the sequence to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        Shape:
+            see the docs in Transformer class.
+        """
+
+        # see Fig. 1 of https://arxiv.org/pdf/2002.04745v1.pdf
+
+        x = src
+        if self.norm_first:
+            x = x + self.res_weight1*self._sa_block(self.norm1(x), src_mask,
+                                                    src_key_padding_mask)
+            x = x + self.res_weight2*self._ff_block(self.norm2(x))
+        else:
+            x = self.norm1(
+                x + self.res_weight1*self._sa_block(x, src_mask, src_key_padding_mask))
+            x = self.norm2(x + self.res_weight2*self._ff_block(x))
+
+        return x
+
+    # self-attention block
+    def _sa_block(self, x: Tensor,
+                  attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor]) -> Tensor:
+        x = self.self_attn(x, x, x,
+                           attn_mask=attn_mask,
+                           key_padding_mask=key_padding_mask,
+                           need_weights=False)[0]
+        return self.dropout1(x)
+
+    # feed forward block
+    def _ff_block(self, x: Tensor) -> Tensor:
+        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
+        return self.dropout2(x)
+
+
+def generate_square_subsequent_mask(sz: int, device: str = "cpu") -> torch.Tensor:
+    """ Generate the attention mask for causal decoding """
+    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+    mask = (
+        mask.float()
+        .masked_fill(mask == 0, float("-inf"))
+        .masked_fill(mask == 1, float(0.0))
+    ).to(device=device)
    return mask
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/utils.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detgen_utils/utils.py
-import torch
-import torch.nn.functional as F
-from torch import Tensor
-
-def generate_square_subsequent_mask(sz: int, condition_len: int = 1, bool_out=False, device: str = "cpu") -> torch.Tensor:
-    """ Generate the attention mask for causal decoding """
-    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
-
-    if condition_len > 1:
-        mask[:condition_len,:condition_len] = 1
-
-    if not bool_out:
-        mask = (
-            mask.float()
-            .masked_fill(mask == 0, float("-inf"))
-            .masked_fill(mask == 1, float(0.0)))
-    return mask.to(device=device)
-
-
-def dequantize_verts(verts, canvas_size: Tensor, add_noise=False):
-    """Quantizes vertices and outputs integers with specified n_bits."""
-    min_range = -1
-    max_range = 1
-    range_quantize = canvas_size
-
-    verts = verts.type(torch.float32)
-    verts = verts * (max_range - min_range) / range_quantize + min_range
-    if add_noise:
-        verts += torch.rand_like(verts) * range_quantize
-    return verts
-
-
-def quantize_verts(
-        verts,
-        canvas_size: Tensor):
-    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
-        Args:
-            verts: seqlen, 2
-    """
-    min_range = -1
-    max_range = 1
-    range_quantize = canvas_size-1
-
-    verts_ratio = (verts - min_range) / (
-        max_range - min_range)
-    verts_quantize = verts_ratio * range_quantize
-
-    return verts_quantize.type(torch.int32)
-
-
-def top_k_logits(logits, k):
-    """Masks logits such that logits not in top-k are small."""
-    if k == 0:
-        return logits
-    else:
-        values, _ = torch.topk(logits, k=k)
-        k_largest = torch.min(values)
-        logits = torch.where(logits < k_largest,
-                             torch.ones_like(logits)*-1e9, logits)
-        return logits
-
-
-def top_p_logits(logits, p):
-    """Masks logits using nucleus (top-p) sampling."""
-    if p == 1:
-        return logits
-    else:
-
-        seq, dim = logits.shape[1:]
-        logits = logits.view(-1, dim)
-        sort_indices = torch.argsort(logits, dim=-1, descending=True)
-        probs = F.softmax(logits, dim=-1).gather(-1, sort_indices)
-        cumprobs = torch.cumsum(probs, dim=-1) - probs
-
-        # The top 1 candidate always will not be masked.
-        # This way ensures at least 1 indices will be selected.
-        sort_mask = (cumprobs > p).type(logits.dtype)
-        batch_indices = torch.repeat_interleave(
-            torch.arange(logits.shape[0]).unsqueeze(-1), dim, dim=-1)
-
-        top_p_mask = torch.zeros_like(logits)
-        top_p_mask = top_p_mask.scatter_add(-1, sort_indices, sort_mask)
-
-        logits -= top_p_mask * 1e9
-        return logits.view(-1, seq, dim)
+import torch
+import torch.nn.functional as F
+from torch import Tensor
+
+def generate_square_subsequent_mask(sz: int, condition_len: int = 1, bool_out=False, device: str = "cpu") -> torch.Tensor:
+    """ Generate the attention mask for causal decoding """
+    mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+
+    if condition_len > 1:
+        mask[:condition_len,:condition_len] = 1
+
+    if not bool_out:
+        mask = (
+            mask.float()
+            .masked_fill(mask == 0, float("-inf"))
+            .masked_fill(mask == 1, float(0.0)))
+    return mask.to(device=device)
+
+
+def dequantize_verts(verts, canvas_size: Tensor, add_noise=False):
+    """Quantizes vertices and outputs integers with specified n_bits."""
+    min_range = -1
+    max_range = 1
+    range_quantize = canvas_size
+
+    verts = verts.type(torch.float32)
+    verts = verts * (max_range - min_range) / range_quantize + min_range
+    if add_noise:
+        verts += torch.rand_like(verts) * range_quantize
+    return verts
+
+
+def quantize_verts(
+        verts,
+        canvas_size: Tensor):
+    """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1].
+        Args:
+            verts: seqlen, 2
+    """
+    min_range = -1
+    max_range = 1
+    range_quantize = canvas_size-1
+
+    verts_ratio = (verts - min_range) / (
+        max_range - min_range)
+    verts_quantize = verts_ratio * range_quantize
+
+    return verts_quantize.type(torch.int32)
+
+
+def top_k_logits(logits, k):
+    """Masks logits such that logits not in top-k are small."""
+    if k == 0:
+        return logits
+    else:
+        values, _ = torch.topk(logits, k=k)
+        k_largest = torch.min(values)
+        logits = torch.where(logits < k_largest,
+                             torch.ones_like(logits)*-1e9, logits)
+        return logits
+
+
+def top_p_logits(logits, p):
+    """Masks logits using nucleus (top-p) sampling."""
+    if p == 1:
+        return logits
+    else:
+
+        seq, dim = logits.shape[1:]
+        logits = logits.view(-1, dim)
+        sort_indices = torch.argsort(logits, dim=-1, descending=True)
+        probs = F.softmax(logits, dim=-1).gather(-1, sort_indices)
+        cumprobs = torch.cumsum(probs, dim=-1) - probs
+
+        # The top 1 candidate always will not be masked.
+        # This way ensures at least 1 indices will be selected.
+        sort_mask = (cumprobs > p).type(logits.dtype)
+        batch_indices = torch.repeat_interleave(
+            torch.arange(logits.shape[0]).unsqueeze(-1), dim, dim=-1)
+
+        top_p_mask = torch.zeros_like(logits)
+        top_p_mask = top_p_mask.scatter_add(-1, sort_indices, sort_mask)
+
+        logits -= top_p_mask * 1e9
+        return logits.view(-1, seq, dim)
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_bbox.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_bbox.py
-import copy
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from mmcv.cnn import Conv2d, Linear
-from mmcv.runner import force_fp32
-from torch.distributions.categorical import Categorical
-
-from mmdet.core import multi_apply, reduce_mean
-from mmdet.models import HEADS
-from .detr_head import DETRMapFixedNumHead
-
-
-@HEADS.register_module(force=True)
-class DETRBboxHead(DETRMapFixedNumHead):
-
-    def __init__(self, *args, canvas_size=(400, 200), discrete_output=True, separate_detect=True, 
-        mode='xyxy', bbox_size=None, coord_dim=2, kp_coord_dim=2,
-        **kwargs):
-        self.canvas_size = canvas_size  # hard code
-
-        self.separate_detect = separate_detect
-        self.discrete_output = discrete_output
-        self.bbox_size = 3 if mode=='sce' else 2
-        if bbox_size is not None:
-            self.bbox_size = bbox_size
-        self.coord_dim = coord_dim  # for xyz
-        self.kp_coord_dim = kp_coord_dim
-
-        super(DETRBboxHead, self).__init__(*args, **kwargs)
-        del self.canvas_size
-        self.register_buffer('canvas_size', torch.tensor(canvas_size))
-        self._init_embedding()
-        
-    def _init_embedding(self):
-
-        # for bbox parameter xstart, ystart, xend, yend
-        self.bbox_embedding = nn.Embedding(4, self.embed_dims)
-
-        self.label_embed = nn.Embedding(
-            self.num_classes, self.embed_dims)
-
-        self.img_coord_embed = nn.Linear(2, self.embed_dims)
-
-    def _init_branch(self,):
-        """Initialize classification branch and regression branch of head."""
-        
-        # add sigmoid or not
-        if self.separate_detect:
-            if self.cls_out_channels == self.num_classes+1:
-                self.cls_out_channels = 2
-            else:
-                self.cls_out_channels = 1
-
-        fc_cls = Linear(self.embed_dims, self.cls_out_channels)
-
-        reg_branch = []
-        for _ in range(self.num_reg_fcs):
-            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
-            reg_branch.append(nn.LayerNorm(self.embed_dims))
-            reg_branch.append(nn.ReLU())
-
-        if self.discrete_output:
-            reg_branch.append(nn.Linear(
-                self.embed_dims, max(self.canvas_size), bias=True,))
-        else:
-            reg_branch.append(nn.Linear(
-                self.embed_dims, self.bbox_size*self.coord_dim, bias=True,))
-
-        reg_branch = nn.Sequential(*reg_branch)
-
-        def _get_clones(module, N):
-            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
-
-        num_pred = self.transformer.decoder.num_layers
-
-        if self.iterative:
-            fc_cls = _get_clones(fc_cls, num_pred)
-            reg_branch = _get_clones(reg_branch, num_pred)
-
-        self.pre_branches = nn.ModuleDict([
-            ('cls', fc_cls),
-            ('reg', reg_branch), ])
-
-    def _prepare_context(self, batch, context):
-        """Prepare class label and vertex context."""
-
-        global_context_embedding = None
-        if self.separate_detect:
-            global_context_embedding = self.label_embed(batch['class_label'])
-
-        # Image context
-        if self.separate_detect:
-            image_embeddings = assign_bev(
-                context['bev_embeddings'], batch['batch_idx'])
-        else:
-            image_embeddings = context['bev_embeddings']
-
-        image_embeddings = self.input_proj(
-            image_embeddings)  # only change feature size
-
-        # Pass images through encoder
-        device = image_embeddings.device
-
-        # Add 2D coordinate grid embedding
-        B, C, H, W = image_embeddings.shape
-        Ws = torch.linspace(-1., 1., W)
-        Hs = torch.linspace(-1., 1., H)
-        image_coords = torch.stack(
-            torch.meshgrid(Hs, Ws), dim=-1).to(device)
-        image_coord_embeddings = self.img_coord_embed(image_coords)
-
-        image_embeddings += image_coord_embeddings[None].permute(0, 3, 1, 2)
-
-        # Reshape spatial grid to sequence
-        sequential_context_embeddings = image_embeddings.reshape(
-            B, C, H, W)
-
-        return (global_context_embedding, sequential_context_embeddings)
-
-    def forward(self, batch, context, img_metas=None):
-        '''
-        Args:
-            bev_feature (List[Tensor]): shape [B, C, H, W]
-                feature in bev view
-            img_metas
-        Outs:
-            preds_dict (Dict):
-                all_cls_scores (Tensor): Classification score of all
-                    decoder layers, has shape
-                    [nb_dec, bs, num_query, cls_out_channels].
-                all_lines_preds (Tensor):
-                    [nb_dec, bs, num_query, num_points, 2].
-        '''
-
-        (global_context_embedding, sequential_context_embeddings) =\
-            self._prepare_context(batch, context)
-
-        if self.separate_detect:
-            query_embedding = self.query_embedding.weight[None] + \
-                global_context_embedding[:, None]
-        else:
-            B = sequential_context_embeddings.shape[0]
-            query_embedding = self.query_embedding.weight[None].repeat(B, 1, 1)
-
-        x = sequential_context_embeddings
-        B, C, H, W = x.shape
-
-        masks = x.new_zeros((B, H, W))
-        pos_embed = self.positional_encoding(masks)
-        # outs_dec: [nb_dec, bs, num_query, embed_dim]
-        outs_dec, _ = self.transformer(x, masks.type(torch.bool), query_embedding,
-                                       pos_embed)
-
-        outputs = []
-        for i, query_feat in enumerate(outs_dec):
-            outputs.append(self.get_prediction(query_feat))
-
-        return outputs
-
-    def get_prediction(self, query_feat):
-
-        ocls = self.pre_branches['cls'](query_feat)
-
-        if self.discrete_output:
-            pos = []
-            for i in range(4):
-                pos_embeds = self.bbox_embedding.weight[i]
-                _pos = self.pre_branches['reg'](query_feat+pos_embeds)
-                pos.append(_pos)
-
-            # # y mask
-            # _vert_mask = torch.arange(logits.shape[-1], device=logits.device)
-            # vertices_mask_y = (_vert_mask < self.canvas_size[1]+1)
-            # logits[:,1::2] = logits[:,1::2]*vertices_mask_y - ~vertices_mask_y*1e9
-            logits = torch.stack(pos, dim=-2)/1.
-            lines = Categorical(logits=logits)
-        else:
-            lines = self.pre_branches['reg'](query_feat).sigmoid()
-            lines = lines.unflatten(-1, (self.bbox_size, self.coord_dim))*self.canvas_size
-            lines = lines.flatten(-2)
-
-        return dict(
-            lines=lines,  # [bs, num_query, 4, num_canvas_size]
-            scores=ocls,  # [bs, num_query, num_class]
-        )
-
-    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
-    def _get_target_single(self,
-                           score_pred,
-                           lines_pred,
-                           gt_labels,
-                           gt_lines,
-                           gt_bboxes_ignore=None):
-        """
-            Compute regression and classification targets for one image.
-            Outputs from a single decoder layer of a single feature level are used.
-            Args:
-                cls_score (Tensor): Box score logits from a single decoder layer
-                    for one image. Shape [num_query, cls_out_channels].
-                lines_pred (Tensor):
-                    shape [num_query, num_points, 2].
-                gt_lines (Tensor):
-                    shape [num_gt, num_points, 2].
-                gt_labels (torch.LongTensor)
-                    shape [num_gt, ]
-            Returns:
-                tuple[Tensor]: a tuple containing the following for one image.
-                    - labels (LongTensor): Labels of each image.
-                        shape [num_query, 1]
-                    - label_weights (Tensor]): Label weights of each image.
-                        shape [num_query, 1]
-                    - lines_target (Tensor): Lines targets of each image.
-                        shape [num_query, num_points, 2]
-                    - lines_weights (Tensor): Lines weights of each image.
-                        shape [num_query, num_points, 2]
-                    - pos_inds (Tensor): Sampled positive indices for each image.
-                    - neg_inds (Tensor): Sampled negative indices for each image.
-        """
-
-        num_pred_lines = len(lines_pred)
-        # assigner and sampler
-        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
-                                             gts=dict(lines=gt_lines,
-                                                      labels=gt_labels, ),
-                                             gt_bboxes_ignore=gt_bboxes_ignore)
-        sampling_result = self.sampler.sample(
-            assign_result, lines_pred, gt_lines)
-        pos_inds = sampling_result.pos_inds
-        neg_inds = sampling_result.neg_inds
-        pos_gt_inds = sampling_result.pos_assigned_gt_inds
-
-        # label targets 0: foreground, 1: background
-        if self.separate_detect:
-            labels = gt_lines.new_full((num_pred_lines, ), 1, dtype=torch.long)
-        else:
-            labels = gt_lines.new_full(
-                (num_pred_lines, ), self.num_classes, dtype=torch.long)
-        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
-        label_weights = gt_lines.new_ones(num_pred_lines)
-
-        # bbox targets since lines_pred's last dimension is the vocabulary
-        # and ground truth dose not have this dimension.
-        if self.discrete_output:
-            lines_target = torch.zeros_like(lines_pred[..., 0]).long()
-            lines_weights = torch.zeros_like(lines_pred[..., 0])
-        else:
-            lines_target = torch.zeros_like(lines_pred)
-            lines_weights = torch.zeros_like(lines_pred)
-
-        lines_target[pos_inds] = sampling_result.pos_gt_bboxes.type(
-            lines_target.dtype)
-        lines_weights[pos_inds] = 1.0
-
-        n = lines_weights.sum(-1, keepdim=True)
-        lines_weights = lines_weights / n.masked_fill(n == 0, 1)
-
-        return (labels, label_weights, lines_target, lines_weights,
-                pos_inds, neg_inds, pos_gt_inds)
-
-    # @force_fp32(apply_to=('preds', 'gts'))
-    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
-        """
-            Compute regression and classification targets for a batch image.
-            Outputs from a single decoder layer of a single feature level are used.
-            Args:
-                cls_scores_list (list[Tensor]): Box score logits from a single
-                    decoder layer for each image with shape [num_query,
-                    cls_out_channels].
-                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
-                gt_lines_list (list[Tensor]): Ground truth lines for each image
-                    with shape (num_gts, num_points, 2)
-                gt_labels_list (list[Tensor]): Ground truth class indices for each
-                    image with shape (num_gts, ).
-                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
-                    boxes which can be ignored for each image. Default None.
-            Returns:
-                tuple: a tuple containing the following targets.
-                    - labels_list (list[Tensor]): Labels for all images.
-                    - label_weights_list (list[Tensor]): Label weights for all \
-                        images.
-                    - lines_targets_list (list[Tensor]): Lines targets for all \
-                        images.
-                    - lines_weight_list (list[Tensor]): Lines weights for all \
-                        images.
-                    - num_total_pos (int): Number of positive samples in all \
-                        images.
-                    - num_total_neg (int): Number of negative samples in all \
-                        images.
-        """
-        assert gt_bboxes_ignore_list is None, \
-            'Only supports for gt_bboxes_ignore setting to None.'
-
-        # format the inputs
-        if self.separate_detect:
-            bbox = [b[m] for b, m in zip(gts['bbox'], gts['bbox_mask'])]
-            class_label = torch.zeros_like(gts['bbox_mask']).long()
-            class_label = [b[m] for b, m in zip(class_label, gts['bbox_mask'])]
-        else:
-            class_label = gts['class_label']
-            bbox = gts['bbox']
-
-        if self.discrete_output:
-            lines_pred = preds['lines'].logits
-        else:
-            lines_pred = preds['lines']
-            bbox = [b.float() for b in bbox]
-
-        (labels_list, label_weights_list,
-         lines_targets_list, lines_weights_list,
-         pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(
-             self._get_target_single,
-             preds['scores'], lines_pred,
-             class_label, bbox,
-             gt_bboxes_ignore=gt_bboxes_ignore_list)
-
-        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
-        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
-        new_gts = dict(
-            labels=labels_list,
-            label_weights=label_weights_list,
-            bboxs=lines_targets_list,
-            bboxs_weights=lines_weights_list,
-        )
-
-        return new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list
-
-    # @force_fp32(apply_to=('preds', 'gts'))
-    def loss_single(self,
-                    preds: dict,
-                    gts: dict,
-                    gt_bboxes_ignore_list=None,
-                    reduction='none'):
-        """
-            Loss function for outputs from a single decoder layer of a single
-            feature level.
-            Args:
-                cls_scores (Tensor): Box score logits from a single decoder layer
-                    for all images. Shape [bs, num_query, cls_out_channels].
-                lines_preds (Tensor):
-                    shape [bs, num_query, num_points, 2].
-                gt_lines_list (list[Tensor]):
-                    with shape (num_gts, num_points, 2)
-                gt_labels_list (list[Tensor]): Ground truth class indices for each
-                    image with shape (num_gts, ).
-                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
-                    boxes which can be ignored for each image. Default None.
-            Returns:
-                dict[str, Tensor]: A dictionary of loss components for outputs from
-                    a single decoder layer.
-        """
-
-        # Get target for each sample
-        new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\
-            self.get_targets(preds, gts, gt_bboxes_ignore_list)
-
-        # Batched all data
-        for k, v in new_gts.items():
-            new_gts[k] = torch.stack(v, dim=0)
-
-        # construct weighted avg_factor to match with the official DETR repo
-        cls_avg_factor = num_total_pos * 1.0 + \
-            num_total_neg * self.bg_cls_weight
-        if self.sync_cls_avg_factor:
-            cls_avg_factor = reduce_mean(
-                preds['scores'].new_tensor([cls_avg_factor]))
-        cls_avg_factor = max(cls_avg_factor, 1)
-
-        # Classification loss
-        if self.separate_detect:
-            loss_cls = self.bce_loss(
-                preds['scores'], new_gts['labels'], new_gts['label_weights'], cls_avg_factor)
-        else:
-            # since the inputs needs the second dim is the class dim, we permute the prediction.
-            cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
-            cls_labels = new_gts['labels'].reshape(-1)
-            cls_weights = new_gts['label_weights'].reshape(-1)
-            loss_cls = self.loss_cls(
-                cls_scores, cls_labels, cls_weights, avg_factor=cls_avg_factor)
-
-        # Compute the average number of gt boxes accross all gpus, for
-        # normalization purposes
-        num_total_pos = loss_cls.new_tensor([num_total_pos])
-        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
-
-        # position NLL loss
-        if self.discrete_output:
-            loss_reg = -(preds['lines'].log_prob(new_gts['bboxs']) *
-                         new_gts['bboxs_weights']).sum()/(num_total_pos)
-        else:
-            loss_reg = self.reg_loss(
-                preds['lines'], new_gts['bboxs'], new_gts['bboxs_weights'], avg_factor=num_total_pos)
-
-        loss_dict = dict(
-            cls=loss_cls,
-            reg=loss_reg,
-        )
-
-        return loss_dict, pos_inds_list, pos_gt_inds_list
-
-    def bce_loss(self, logits, label, weights, cls_avg_factor):
-        ''' binary ce plog(p) + (1-p)log(1-p)
-            logits: B,n,1
-            label:
-        '''
-        p = logits.squeeze(-1).sigmoid()
-
-        pos_msk = label == 0
-        neg_msk = ~pos_msk
-
-        loss_cls = -(p.log()*pos_msk + (1-p).log()*neg_msk)
-
-        loss_cls = (loss_cls * weights).sum()/cls_avg_factor
-
-        return loss_cls
-
-    def post_process(self, preds_dicts: list, **kwargs):
-        '''
-        Args:
-            preds_dicts:
-                scores (Tensor): Classification score of all
-                    decoder layers, has shape
-                    [nb_dec, bs, num_query, cls_out_channels].
-                lines (Tensor):
-                    [nb_dec, bs, num_query, bbox parameters(4)].
-        Outs:
-            ret_list (List[Dict]) with length as bs
-                list of result dict for each sample in the batch
-                XXX
-        '''
-        preds = preds_dicts[-1]
-
-        batched_cls_scores = preds['scores']
-        batched_lines_preds = preds['lines']
-        batch_size = batched_cls_scores.size(0)
-        device = batched_cls_scores.device
-
-        result_dict = {
-            'bbox': [],
-            'scores': [],
-            'labels': [],
-            'bbox_flat': [],
-            'lines_cls': [],
-            'lines_bs_idx': [],
-        }
-        for i in range(batch_size):
-
-            cls_scores = batched_cls_scores[i]
-            det_preds = batched_lines_preds[i]
-            max_num = self.max_lines
-
-            if self.loss_cls.use_sigmoid:
-                cls_scores = cls_scores.sigmoid()
-                scores, valid_idx = cls_scores.view(-1).topk(max_num)
-                det_labels = valid_idx % self.num_classes
-                valid_idx = valid_idx // self.num_classes
-                det_preds = det_preds[valid_idx]
-            else:
-                scores, det_labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
-                scores, valid_idx = scores.topk(max_num)
-                det_preds = det_preds[valid_idx]
-                det_labels = det_labels[valid_idx]
-
-            nline = len(valid_idx)
-            result_dict['bbox'].append(det_preds)
-            result_dict['scores'].append(scores)
-            result_dict['labels'].append(det_labels)
-            result_dict['lines_bs_idx'].extend([i]*nline)
-
-        # for down stream polyline
-        _bboxs = torch.cat(result_dict['bbox'], dim=0)
-        # quantize the data
-        result_dict['bbox_flat'] = torch.round(_bboxs).type(torch.int32)
-
-        result_dict['lines_cls'] = torch.cat(
-            result_dict['labels'], dim=0).long()
-        result_dict['lines_bs_idx'] = torch.tensor(
-            result_dict['lines_bs_idx'], device=device).long()
-
-        return result_dict
-
-
-def assign_bev(feat, idx):
+import copy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import Conv2d, Linear
+from mmcv.runner import force_fp32
+from torch.distributions.categorical import Categorical
+
+from mmdet.core import multi_apply, reduce_mean
+from mmdet.models import HEADS
+from .detr_head import DETRMapFixedNumHead
+
+
+@HEADS.register_module(force=True)
+class DETRBboxHead(DETRMapFixedNumHead):
+
+    def __init__(self, *args, canvas_size=(400, 200), discrete_output=True, separate_detect=True, 
+        mode='xyxy', bbox_size=None, coord_dim=2, kp_coord_dim=2,
+        **kwargs):
+        self.canvas_size = canvas_size  # hard code
+
+        self.separate_detect = separate_detect
+        self.discrete_output = discrete_output
+        self.bbox_size = 3 if mode=='sce' else 2
+        if bbox_size is not None:
+            self.bbox_size = bbox_size
+        self.coord_dim = coord_dim  # for xyz
+        self.kp_coord_dim = kp_coord_dim
+
+        super(DETRBboxHead, self).__init__(*args, **kwargs)
+        del self.canvas_size
+        self.register_buffer('canvas_size', torch.tensor(canvas_size))
+        self._init_embedding()
+        
+    def _init_embedding(self):
+
+        # for bbox parameter xstart, ystart, xend, yend
+        self.bbox_embedding = nn.Embedding(4, self.embed_dims)
+
+        self.label_embed = nn.Embedding(
+            self.num_classes, self.embed_dims)
+
+        self.img_coord_embed = nn.Linear(2, self.embed_dims)
+
+    def _init_branch(self,):
+        """Initialize classification branch and regression branch of head."""
+        
+        # add sigmoid or not
+        if self.separate_detect:
+            if self.cls_out_channels == self.num_classes+1:
+                self.cls_out_channels = 2
+            else:
+                self.cls_out_channels = 1
+
+        fc_cls = Linear(self.embed_dims, self.cls_out_channels)
+
+        reg_branch = []
+        for _ in range(self.num_reg_fcs):
+            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
+            reg_branch.append(nn.LayerNorm(self.embed_dims))
+            reg_branch.append(nn.ReLU())
+
+        if self.discrete_output:
+            reg_branch.append(nn.Linear(
+                self.embed_dims, max(self.canvas_size), bias=True,))
+        else:
+            reg_branch.append(nn.Linear(
+                self.embed_dims, self.bbox_size*self.coord_dim, bias=True,))
+
+        reg_branch = nn.Sequential(*reg_branch)
+
+        def _get_clones(module, N):
+            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+        num_pred = self.transformer.decoder.num_layers
+
+        if self.iterative:
+            fc_cls = _get_clones(fc_cls, num_pred)
+            reg_branch = _get_clones(reg_branch, num_pred)
+
+        self.pre_branches = nn.ModuleDict([
+            ('cls', fc_cls),
+            ('reg', reg_branch), ])
+
+    def _prepare_context(self, batch, context):
+        """Prepare class label and vertex context."""
+
+        global_context_embedding = None
+        if self.separate_detect:
+            global_context_embedding = self.label_embed(batch['class_label'])
+
+        # Image context
+        if self.separate_detect:
+            image_embeddings = assign_bev(
+                context['bev_embeddings'], batch['batch_idx'])
+        else:
+            image_embeddings = context['bev_embeddings']
+
+        image_embeddings = self.input_proj(
+            image_embeddings)  # only change feature size
+
+        # Pass images through encoder
+        device = image_embeddings.device
+
+        # Add 2D coordinate grid embedding
+        B, C, H, W = image_embeddings.shape
+        Ws = torch.linspace(-1., 1., W)
+        Hs = torch.linspace(-1., 1., H)
+        image_coords = torch.stack(
+            torch.meshgrid(Hs, Ws), dim=-1).to(device)
+        image_coord_embeddings = self.img_coord_embed(image_coords)
+
+        image_embeddings += image_coord_embeddings[None].permute(0, 3, 1, 2)
+
+        # Reshape spatial grid to sequence
+        sequential_context_embeddings = image_embeddings.reshape(
+            B, C, H, W)
+
+        return (global_context_embedding, sequential_context_embeddings)
+
+    def forward(self, batch, context, img_metas=None):
+        '''
+        Args:
+            bev_feature (List[Tensor]): shape [B, C, H, W]
+                feature in bev view
+            img_metas
+        Outs:
+            preds_dict (Dict):
+                all_cls_scores (Tensor): Classification score of all
+                    decoder layers, has shape
+                    [nb_dec, bs, num_query, cls_out_channels].
+                all_lines_preds (Tensor):
+                    [nb_dec, bs, num_query, num_points, 2].
+        '''
+
+        (global_context_embedding, sequential_context_embeddings) =\
+            self._prepare_context(batch, context)
+
+        if self.separate_detect:
+            query_embedding = self.query_embedding.weight[None] + \
+                global_context_embedding[:, None]
+        else:
+            B = sequential_context_embeddings.shape[0]
+            query_embedding = self.query_embedding.weight[None].repeat(B, 1, 1)
+
+        x = sequential_context_embeddings
+        B, C, H, W = x.shape
+
+        masks = x.new_zeros((B, H, W))
+        pos_embed = self.positional_encoding(masks)
+        # outs_dec: [nb_dec, bs, num_query, embed_dim]
+        outs_dec, _ = self.transformer(x, masks.type(torch.bool), query_embedding,
+                                       pos_embed)
+
+        outputs = []
+        for i, query_feat in enumerate(outs_dec):
+            outputs.append(self.get_prediction(query_feat))
+
+        return outputs
+
+    def get_prediction(self, query_feat):
+
+        ocls = self.pre_branches['cls'](query_feat)
+
+        if self.discrete_output:
+            pos = []
+            for i in range(4):
+                pos_embeds = self.bbox_embedding.weight[i]
+                _pos = self.pre_branches['reg'](query_feat+pos_embeds)
+                pos.append(_pos)
+
+            # # y mask
+            # _vert_mask = torch.arange(logits.shape[-1], device=logits.device)
+            # vertices_mask_y = (_vert_mask < self.canvas_size[1]+1)
+            # logits[:,1::2] = logits[:,1::2]*vertices_mask_y - ~vertices_mask_y*1e9
+            logits = torch.stack(pos, dim=-2)/1.
+            lines = Categorical(logits=logits)
+        else:
+            lines = self.pre_branches['reg'](query_feat).sigmoid()
+            lines = lines.unflatten(-1, (self.bbox_size, self.coord_dim))*self.canvas_size
+            lines = lines.flatten(-2)
+
+        return dict(
+            lines=lines,  # [bs, num_query, 4, num_canvas_size]
+            scores=ocls,  # [bs, num_query, num_class]
+        )
+
+    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
+    def _get_target_single(self,
+                           score_pred,
+                           lines_pred,
+                           gt_labels,
+                           gt_lines,
+                           gt_bboxes_ignore=None):
+        """
+            Compute regression and classification targets for one image.
+            Outputs from a single decoder layer of a single feature level are used.
+            Args:
+                cls_score (Tensor): Box score logits from a single decoder layer
+                    for one image. Shape [num_query, cls_out_channels].
+                lines_pred (Tensor):
+                    shape [num_query, num_points, 2].
+                gt_lines (Tensor):
+                    shape [num_gt, num_points, 2].
+                gt_labels (torch.LongTensor)
+                    shape [num_gt, ]
+            Returns:
+                tuple[Tensor]: a tuple containing the following for one image.
+                    - labels (LongTensor): Labels of each image.
+                        shape [num_query, 1]
+                    - label_weights (Tensor]): Label weights of each image.
+                        shape [num_query, 1]
+                    - lines_target (Tensor): Lines targets of each image.
+                        shape [num_query, num_points, 2]
+                    - lines_weights (Tensor): Lines weights of each image.
+                        shape [num_query, num_points, 2]
+                    - pos_inds (Tensor): Sampled positive indices for each image.
+                    - neg_inds (Tensor): Sampled negative indices for each image.
+        """
+
+        num_pred_lines = len(lines_pred)
+        # assigner and sampler
+        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
+                                             gts=dict(lines=gt_lines,
+                                                      labels=gt_labels, ),
+                                             gt_bboxes_ignore=gt_bboxes_ignore)
+        sampling_result = self.sampler.sample(
+            assign_result, lines_pred, gt_lines)
+        pos_inds = sampling_result.pos_inds
+        neg_inds = sampling_result.neg_inds
+        pos_gt_inds = sampling_result.pos_assigned_gt_inds
+
+        # label targets 0: foreground, 1: background
+        if self.separate_detect:
+            labels = gt_lines.new_full((num_pred_lines, ), 1, dtype=torch.long)
+        else:
+            labels = gt_lines.new_full(
+                (num_pred_lines, ), self.num_classes, dtype=torch.long)
+        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
+        label_weights = gt_lines.new_ones(num_pred_lines)
+
+        # bbox targets since lines_pred's last dimension is the vocabulary
+        # and ground truth dose not have this dimension.
+        if self.discrete_output:
+            lines_target = torch.zeros_like(lines_pred[..., 0]).long()
+            lines_weights = torch.zeros_like(lines_pred[..., 0])
+        else:
+            lines_target = torch.zeros_like(lines_pred)
+            lines_weights = torch.zeros_like(lines_pred)
+
+        lines_target[pos_inds] = sampling_result.pos_gt_bboxes.type(
+            lines_target.dtype)
+        lines_weights[pos_inds] = 1.0
+
+        n = lines_weights.sum(-1, keepdim=True)
+        lines_weights = lines_weights / n.masked_fill(n == 0, 1)
+
+        return (labels, label_weights, lines_target, lines_weights,
+                pos_inds, neg_inds, pos_gt_inds)
+
+    # @force_fp32(apply_to=('preds', 'gts'))
+    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
+        """
+            Compute regression and classification targets for a batch image.
+            Outputs from a single decoder layer of a single feature level are used.
+            Args:
+                cls_scores_list (list[Tensor]): Box score logits from a single
+                    decoder layer for each image with shape [num_query,
+                    cls_out_channels].
+                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
+                gt_lines_list (list[Tensor]): Ground truth lines for each image
+                    with shape (num_gts, num_points, 2)
+                gt_labels_list (list[Tensor]): Ground truth class indices for each
+                    image with shape (num_gts, ).
+                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
+                    boxes which can be ignored for each image. Default None.
+            Returns:
+                tuple: a tuple containing the following targets.
+                    - labels_list (list[Tensor]): Labels for all images.
+                    - label_weights_list (list[Tensor]): Label weights for all \
+                        images.
+                    - lines_targets_list (list[Tensor]): Lines targets for all \
+                        images.
+                    - lines_weight_list (list[Tensor]): Lines weights for all \
+                        images.
+                    - num_total_pos (int): Number of positive samples in all \
+                        images.
+                    - num_total_neg (int): Number of negative samples in all \
+                        images.
+        """
+        assert gt_bboxes_ignore_list is None, \
+            'Only supports for gt_bboxes_ignore setting to None.'
+
+        # format the inputs
+        if self.separate_detect:
+            bbox = [b[m] for b, m in zip(gts['bbox'], gts['bbox_mask'])]
+            class_label = torch.zeros_like(gts['bbox_mask']).long()
+            class_label = [b[m] for b, m in zip(class_label, gts['bbox_mask'])]
+        else:
+            class_label = gts['class_label']
+            bbox = gts['bbox']
+
+        if self.discrete_output:
+            lines_pred = preds['lines'].logits
+        else:
+            lines_pred = preds['lines']
+            bbox = [b.float() for b in bbox]
+
+        (labels_list, label_weights_list,
+         lines_targets_list, lines_weights_list,
+         pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(
+             self._get_target_single,
+             preds['scores'], lines_pred,
+             class_label, bbox,
+             gt_bboxes_ignore=gt_bboxes_ignore_list)
+
+        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
+        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
+        new_gts = dict(
+            labels=labels_list,
+            label_weights=label_weights_list,
+            bboxs=lines_targets_list,
+            bboxs_weights=lines_weights_list,
+        )
+
+        return new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list
+
+    # @force_fp32(apply_to=('preds', 'gts'))
+    def loss_single(self,
+                    preds: dict,
+                    gts: dict,
+                    gt_bboxes_ignore_list=None,
+                    reduction='none'):
+        """
+            Loss function for outputs from a single decoder layer of a single
+            feature level.
+            Args:
+                cls_scores (Tensor): Box score logits from a single decoder layer
+                    for all images. Shape [bs, num_query, cls_out_channels].
+                lines_preds (Tensor):
+                    shape [bs, num_query, num_points, 2].
+                gt_lines_list (list[Tensor]):
+                    with shape (num_gts, num_points, 2)
+                gt_labels_list (list[Tensor]): Ground truth class indices for each
+                    image with shape (num_gts, ).
+                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
+                    boxes which can be ignored for each image. Default None.
+            Returns:
+                dict[str, Tensor]: A dictionary of loss components for outputs from
+                    a single decoder layer.
+        """
+
+        # Get target for each sample
+        new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\
+            self.get_targets(preds, gts, gt_bboxes_ignore_list)
+
+        # Batched all data
+        for k, v in new_gts.items():
+            new_gts[k] = torch.stack(v, dim=0)
+
+        # construct weighted avg_factor to match with the official DETR repo
+        cls_avg_factor = num_total_pos * 1.0 + \
+            num_total_neg * self.bg_cls_weight
+        if self.sync_cls_avg_factor:
+            cls_avg_factor = reduce_mean(
+                preds['scores'].new_tensor([cls_avg_factor]))
+        cls_avg_factor = max(cls_avg_factor, 1)
+
+        # Classification loss
+        if self.separate_detect:
+            loss_cls = self.bce_loss(
+                preds['scores'], new_gts['labels'], new_gts['label_weights'], cls_avg_factor)
+        else:
+            # since the inputs needs the second dim is the class dim, we permute the prediction.
+            cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
+            cls_labels = new_gts['labels'].reshape(-1)
+            cls_weights = new_gts['label_weights'].reshape(-1)
+            loss_cls = self.loss_cls(
+                cls_scores, cls_labels, cls_weights, avg_factor=cls_avg_factor)
+
+        # Compute the average number of gt boxes accross all gpus, for
+        # normalization purposes
+        num_total_pos = loss_cls.new_tensor([num_total_pos])
+        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
+
+        # position NLL loss
+        if self.discrete_output:
+            loss_reg = -(preds['lines'].log_prob(new_gts['bboxs']) *
+                         new_gts['bboxs_weights']).sum()/(num_total_pos)
+        else:
+            loss_reg = self.reg_loss(
+                preds['lines'], new_gts['bboxs'], new_gts['bboxs_weights'], avg_factor=num_total_pos)
+
+        loss_dict = dict(
+            cls=loss_cls,
+            reg=loss_reg,
+        )
+
+        return loss_dict, pos_inds_list, pos_gt_inds_list
+
+    def bce_loss(self, logits, label, weights, cls_avg_factor):
+        ''' binary ce plog(p) + (1-p)log(1-p)
+            logits: B,n,1
+            label:
+        '''
+        p = logits.squeeze(-1).sigmoid()
+
+        pos_msk = label == 0
+        neg_msk = ~pos_msk
+
+        loss_cls = -(p.log()*pos_msk + (1-p).log()*neg_msk)
+
+        loss_cls = (loss_cls * weights).sum()/cls_avg_factor
+
+        return loss_cls
+
+    def post_process(self, preds_dicts: list, **kwargs):
+        '''
+        Args:
+            preds_dicts:
+                scores (Tensor): Classification score of all
+                    decoder layers, has shape
+                    [nb_dec, bs, num_query, cls_out_channels].
+                lines (Tensor):
+                    [nb_dec, bs, num_query, bbox parameters(4)].
+        Outs:
+            ret_list (List[Dict]) with length as bs
+                list of result dict for each sample in the batch
+                XXX
+        '''
+        preds = preds_dicts[-1]
+
+        batched_cls_scores = preds['scores']
+        batched_lines_preds = preds['lines']
+        batch_size = batched_cls_scores.size(0)
+        device = batched_cls_scores.device
+
+        result_dict = {
+            'bbox': [],
+            'scores': [],
+            'labels': [],
+            'bbox_flat': [],
+            'lines_cls': [],
+            'lines_bs_idx': [],
+        }
+        for i in range(batch_size):
+
+            cls_scores = batched_cls_scores[i]
+            det_preds = batched_lines_preds[i]
+            max_num = self.max_lines
+
+            if self.loss_cls.use_sigmoid:
+                cls_scores = cls_scores.sigmoid()
+                scores, valid_idx = cls_scores.view(-1).topk(max_num)
+                det_labels = valid_idx % self.num_classes
+                valid_idx = valid_idx // self.num_classes
+                det_preds = det_preds[valid_idx]
+            else:
+                scores, det_labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
+                scores, valid_idx = scores.topk(max_num)
+                det_preds = det_preds[valid_idx]
+                det_labels = det_labels[valid_idx]
+
+            nline = len(valid_idx)
+            result_dict['bbox'].append(det_preds)
+            result_dict['scores'].append(scores)
+            result_dict['labels'].append(det_labels)
+            result_dict['lines_bs_idx'].extend([i]*nline)
+
+        # for down stream polyline
+        _bboxs = torch.cat(result_dict['bbox'], dim=0)
+        # quantize the data
+        result_dict['bbox_flat'] = torch.round(_bboxs).type(torch.int32)
+
+        result_dict['lines_cls'] = torch.cat(
+            result_dict['labels'], dim=0).long()
+        result_dict['lines_bs_idx'] = torch.tensor(
+            result_dict['lines_bs_idx'], device=device).long()
+
+        return result_dict
+
+
+def assign_bev(feat, idx):
    return feat[idx]
\ No newline at end of file
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_head.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/src/models/heads/detr_head.py
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import copy
-from mmdet.models import HEADS
-from mmcv.cnn import Conv2d
-from mmcv.cnn import Linear, build_activation_layer, bias_init_with_prob
-from mmcv.cnn.bricks.transformer import build_positional_encoding
-from mmdet.models.utils import build_transformer
-from mmcv.runner import force_fp32
-
-from mmdet.core import (multi_apply, build_assigner, build_sampler,
-                        reduce_mean)
-from mmdet.models.utils.transformer import inverse_sigmoid
-from mmdet.models import build_loss
-
-from .base_map_head import BaseMapHead
-
-
-@HEADS.register_module()
-class DETRMapFixedNumHead(BaseMapHead):
-
-    def __init__(self,
-                 num_classes=3,
-                 in_channels=128,
-                 num_query=100,
-                 max_lines=50,
-                 score_thre=0.2,
-                 num_reg_fcs=2,
-                 num_points=100,
-                 iterative=False,
-                 patch_size=None,
-                 sync_cls_avg_factor=True,
-                 transformer: dict = None,
-                 positional_encoding: dict = None,
-                 loss_cls: dict = None,
-                 loss_reg: dict = None,
-                 train_cfg: dict = None,
-                 init_cfg=None,
-                 **kwargs):
-        super().__init__()
-
-        assigner = train_cfg['assigner']
-        self.assigner = build_assigner(assigner)
-        # DETR sampling=False, so use PseudoSampler
-        sampler_cfg = dict(type='PseudoSampler')
-        self.sampler = build_sampler(sampler_cfg, context=self)
-
-        self.train_cfg = train_cfg
-        self.max_lines = max_lines
-        self.score_thre = score_thre
-
-        self.num_query = num_query
-        self.in_channels = in_channels
-        self.num_classes = num_classes
-        self.num_points = num_points
-
-        # branch
-        # if loss_cls.use_sigmoid:
-        if loss_cls['use_sigmoid']:
-            self.cls_out_channels = num_classes
-        else:
-            self.cls_out_channels = num_classes+1
-
-        self.iterative = iterative
-        self.num_reg_fcs = num_reg_fcs
-
-        if patch_size is not None:
-            self.register_buffer('patch_size', torch.tensor(
-                (patch_size[1], patch_size[0])),)
-
-        self._build_transformer(transformer, positional_encoding)
-
-        # loss params
-        self.loss_cls = build_loss(loss_cls)
-        self.bg_cls_weight = 0.1
-        if self.loss_cls.use_sigmoid:
-            self.bg_cls_weight = 0.0
-        self.sync_cls_avg_factor = sync_cls_avg_factor
-        self.reg_loss = build_loss(loss_reg)
-
-        # add reg, cls head for each decoder layer
-        self._init_layers()
-        self._init_branch()
-        self.init_weights()
-
-    def _init_layers(self):
-        """Initialize some layer."""
-
-        self.input_proj = Conv2d(
-            self.in_channels, self.embed_dims, kernel_size=1)
-
-        # query_pos_embed & query_embed
-        self.query_embedding = nn.Embedding(self.num_query,
-                                            self.embed_dims)
-
-    def _build_transformer(self, transformer, positional_encoding):
-        # transformer
-        self.act_cfg = transformer.get('act_cfg',
-                                       dict(type='ReLU', inplace=True))
-        self.activate = build_activation_layer(self.act_cfg)
-        self.positional_encoding = build_positional_encoding(
-            positional_encoding)
-        self.transformer = build_transformer(transformer)
-        self.embed_dims = self.transformer.embed_dims
-
-    def _init_branch(self,):
-        """Initialize classification branch and regression branch of head."""
-
-        fc_cls = Linear(self.embed_dims, self.cls_out_channels)
-
-        reg_branch = []
-        for _ in range(self.num_reg_fcs):
-            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
-            reg_branch.append(nn.LayerNorm(self.embed_dims))
-            reg_branch.append(nn.ReLU())
-        reg_branch.append(Linear(self.embed_dims, self.num_points*2))
-        reg_branch = nn.Sequential(*reg_branch)
-        # add sigmoid or not
-
-        def _get_clones(module, N):
-            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
-
-        num_pred = self.transformer.decoder.num_layers
-
-        if self.iterative:
-            fc_cls = _get_clones(fc_cls, num_pred)
-            reg_branch = _get_clones(reg_branch, num_pred)
-
-        self.pre_branches = nn.ModuleDict([
-            ('cls', fc_cls),
-            ('reg', reg_branch), ])
-
-    def init_weights(self):
-        """Initialize weights of the DeformDETR head."""
-
-        for p in self.input_proj.parameters():
-            if p.dim() > 1:
-                nn.init.xavier_uniform_(p)
-
-        self.transformer.init_weights()
-
-        # init prediction branch
-        for k, v in self.pre_branches.items():
-            for param in v.parameters():
-                if param.dim() > 1:
-                    nn.init.xavier_uniform_(param)
-
-        # focal loss init
-        if self.loss_cls.use_sigmoid:
-            bias_init = bias_init_with_prob(0.01)
-            # for last layer
-            if isinstance(self.pre_branches['cls'], nn.ModuleList):
-                for m in self.pre_branches['cls']:
-                    nn.init.constant_(m.bias, bias_init)
-            else:
-                m = self.pre_branches['cls']
-                nn.init.constant_(m.bias, bias_init)
-
-    def forward(self, bev_feature, img_metas=None):
-        '''
-        Args:
-            bev_feature (List[Tensor]): shape [B, C, H, W]
-                feature in bev view
-            img_metas
-        Outs:
-            preds_dict (Dict):
-                all_cls_scores (Tensor): Classification score of all
-                    decoder layers, has shape
-                    [nb_dec, bs, num_query, cls_out_channels].
-                all_lines_preds (Tensor):
-                    [nb_dec, bs, num_query, num_points, 2].
-        '''
-
-        x = bev_feature[0]
-        x = self.input_proj(x)  # only change feature size
-        B, C, H, W = x.shape
-
-        masks = x.new_zeros((B, H, W))
-        pos_embed = self.positional_encoding(masks)
-        # outs_dec: [nb_dec, bs, num_query, embed_dim]
-        outs_dec, _ = self.transformer(x, masks.type(torch.bool), self.query_embedding.weight,
-                                       pos_embed)
-
-        outputs = []
-
-        for i, query_feat in enumerate(outs_dec):
-
-            ocls = self.pre_branches['cls'](query_feat)
-            oreg = self.pre_branches['reg'](query_feat)
-            oreg = oreg.unflatten(dim=2, sizes=(self.num_points, 2))
-            oreg[..., 0:2] = oreg[..., 0:2].sigmoid()  # normalized xyz
-
-            outputs.append(
-                dict(
-                    lines=oreg,  # [bs, num_query, num_points, 2]
-                    scores=ocls,  # [bs, num_query, num_class]
-                )
-            )
-
-        return outputs
-
-    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
-    def _get_target_single(self,
-                           score_pred,
-                           lines_pred,
-                           gt_lines,
-                           gt_labels,
-                           gt_bboxes_ignore=None):
-        """
-            Compute regression and classification targets for one image.
-            Outputs from a single decoder layer of a single feature level are used.
-            Args:
-                cls_score (Tensor): Box score logits from a single decoder layer
-                    for one image. Shape [num_query, cls_out_channels].
-                lines_pred (Tensor):
-                    shape [num_query, num_points, 2].
-                gt_lines (Tensor):
-                    shape [num_gt, num_points, 2].
-                gt_labels (torch.LongTensor)
-                    shape [num_gt, ]
-            Returns:
-                tuple[Tensor]: a tuple containing the following for one image.
-                    - labels (LongTensor): Labels of each image.
-                        shape [num_query, 1]
-                    - label_weights (Tensor]): Label weights of each image.
-                        shape [num_query, 1]
-                    - lines_target (Tensor): Lines targets of each image.
-                        shape [num_query, num_points, 2]
-                    - lines_weights (Tensor): Lines weights of each image.
-                        shape [num_query, num_points, 2]
-                    - pos_inds (Tensor): Sampled positive indices for each image.
-                    - neg_inds (Tensor): Sampled negative indices for each image.
-        """
-
-        num_pred_lines = lines_pred.size(0)
-        # assigner and sampler
-        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
-                                             gts=dict(lines=gt_lines,
-                                                      labels=gt_labels, ),
-                                             gt_bboxes_ignore=gt_bboxes_ignore)
-        sampling_result = self.sampler.sample(
-            assign_result, lines_pred, gt_lines)
-        pos_inds = sampling_result.pos_inds
-        neg_inds = sampling_result.neg_inds
-
-        # label targets
-        labels = gt_lines.new_full((num_pred_lines, ),
-                                   self.num_classes,
-                                   dtype=torch.long)
-        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
-        label_weights = gt_lines.new_ones(num_pred_lines)
-
-        # bbox targets
-        lines_target = torch.zeros_like(lines_pred)
-        lines_target[pos_inds] = sampling_result.pos_gt_bboxes
-
-        lines_weights = torch.zeros_like(lines_pred)
-        lines_weights[pos_inds] = 1.0
-
-        return (labels, label_weights, lines_target, lines_weights,
-                pos_inds, neg_inds)
-
-    @force_fp32(apply_to=('preds', 'gts'))
-    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
-        """
-            Compute regression and classification targets for a batch image.
-            Outputs from a single decoder layer of a single feature level are used.
-            Args:
-                cls_scores_list (list[Tensor]): Box score logits from a single
-                    decoder layer for each image with shape [num_query,
-                    cls_out_channels].
-                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
-                gt_lines_list (list[Tensor]): Ground truth lines for each image
-                    with shape (num_gts, num_points, 2)
-                gt_labels_list (list[Tensor]): Ground truth class indices for each
-                    image with shape (num_gts, ).
-                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
-                    boxes which can be ignored for each image. Default None.
-            Returns:
-                tuple: a tuple containing the following targets.
-                    - labels_list (list[Tensor]): Labels for all images.
-                    - label_weights_list (list[Tensor]): Label weights for all \
-                        images.
-                    - lines_targets_list (list[Tensor]): Lines targets for all \
-                        images.
-                    - lines_weight_list (list[Tensor]): Lines weights for all \
-                        images.
-                    - num_total_pos (int): Number of positive samples in all \
-                        images.
-                    - num_total_neg (int): Number of negative samples in all \
-                        images.
-        """
-        assert gt_bboxes_ignore_list is None, \
-            'Only supports for gt_bboxes_ignore setting to None.'
-
-        (labels_list, label_weights_list,
-         lines_targets_list, lines_weights_list,
-         pos_inds_list, neg_inds_list) = multi_apply(
-             self._get_target_single,
-             preds['scores'], preds['lines'],
-             gts['lines'], gts['labels'],
-             gt_bboxes_ignore=gt_bboxes_ignore_list)
-
-        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
-        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
-        new_gts = dict(
-            labels=labels_list,
-            label_weights=label_weights_list,
-            lines_targets=lines_targets_list,
-            lines_weights=lines_weights_list,
-        )
-
-        return new_gts, num_total_pos, num_total_neg, pos_inds_list
-
-    @force_fp32(apply_to=('preds', 'gts'))
-    def loss_single(self,
-                    preds: dict,
-                    gts: dict,
-                    gt_bboxes_ignore_list=None,
-                    reduction='none'):
-        """ 
-            Loss function for outputs from a single decoder layer of a single
-            feature level.
-            Args:
-                cls_scores (Tensor): Box score logits from a single decoder layer
-                    for all images. Shape [bs, num_query, cls_out_channels].
-                lines_preds (Tensor):
-                    shape [bs, num_query, num_points, 2].
-                gt_lines_list (list[Tensor]): 
-                    with shape (num_gts, num_points, 2)
-                gt_labels_list (list[Tensor]): Ground truth class indices for each
-                    image with shape (num_gts, ).
-                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
-                    boxes which can be ignored for each image. Default None.
-            Returns:
-                dict[str, Tensor]: A dictionary of loss components for outputs from
-                    a single decoder layer.
-        """
-
-        # get target for each sample
-        new_gts, num_total_pos, num_total_neg, pos_inds_list =\
-            self.get_targets(preds, gts, gt_bboxes_ignore_list)
-
-        # batched all data
-        for k, v in new_gts.items():
-            new_gts[k] = torch.cat(v, 0)
-
-        # construct weighted avg_factor to match with the official DETR repo
-        cls_avg_factor = num_total_pos * 1.0 + \
-            num_total_neg * self.bg_cls_weight
-        if self.sync_cls_avg_factor:
-            cls_avg_factor = reduce_mean(
-                preds['scores'].new_tensor([cls_avg_factor]))
-        cls_avg_factor = max(cls_avg_factor, 1)
-
-        # classification loss
-        cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
-        loss_cls = self.loss_cls(
-            cls_scores, new_gts['labels'], new_gts['label_weights'], avg_factor=cls_avg_factor)
-
-        # Compute the average number of gt boxes accross all gpus, for
-        # normalization purposes
-        num_total_pos = loss_cls.new_tensor([num_total_pos])
-        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
-
-        # regression L1 loss
-        lines_preds = preds['lines'].reshape(-1, self.num_points, 2)
-        if reduction == 'none':  # For performance analysis
-            loss_reg = self.reg_loss(
-                lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], reduction_override=reduction, avg_factor=num_total_pos)
-        else:
-            loss_reg = self.reg_loss(
-                lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], avg_factor=num_total_pos)
-
-        loss_dict = dict(
-            cls=loss_cls,
-            reg=loss_reg,
-        )
-
-        return (loss_dict, pos_inds_list)
-
-    @force_fp32(apply_to=('gt_lines_list', 'preds_dicts'))
-    def loss(self,
-             gts: dict,
-             preds_dicts: dict,
-             gt_bboxes_ignore=None,
-             reduction='mean'):
-        """
-            Loss Function.
-            Args:
-                gt_lines_list (list[Tensor]): Ground truth lines for each image
-                    with shape (num_gts, num_points, 2)
-                gt_labels_list (list[Tensor]): Ground truth class indices for each
-                    image with shape (num_gts, ).
-                preds_dicts:
-                    all_cls_scores (Tensor): Classification score of all
-                        decoder layers, has shape
-                        [nb_dec, bs, num_query, cls_out_channels].
-                    all_lines_preds (Tensor):
-                        [nb_dec, bs, num_query, num_points, 2].
-                gt_bboxes_ignore (list[Tensor], optional): Bounding boxes
-                    which can be ignored for each image. Default None.
-            Returns:
-                dict[str, Tensor]: A dictionary of loss components.
-        """
-        assert gt_bboxes_ignore is None, \
-            f'{self.__class__.__name__} only supports ' \
-            f'for gt_bboxes_ignore setting to None.'
-
-        # Since there might have multi layer
-        losses, pos_inds_lists, pos_gt_inds_lists = multi_apply(
-            self.loss_single,
-            preds_dicts,
-            gts=gts,
-            gt_bboxes_ignore_list=gt_bboxes_ignore,
-            reduction=reduction)
-
-        # Format the losses
-        loss_dict = dict()
-        # loss from the last decoder layer
-        for k, v in losses[-1].items():
-            loss_dict[k] = v
-
-        # Loss from other decoder layers
-        num_dec_layer = 0
-        for loss in losses[:-1]:
-            for k, v in loss.items():
-                loss_dict[f'd{num_dec_layer}.{k}'] = v
-            num_dec_layer += 1
-
-        return loss_dict, pos_inds_lists, pos_gt_inds_lists
-
-    def post_process(self, preds_dict, tokens, gts):
-        '''
-        Args:
-            preds_dict:
-                all_cls_scores (Tensor): Classification score of all
-                    decoder layers, has shape
-                    [nb_dec, bs, num_query, cls_out_channels].
-                all_lines_preds (Tensor):
-                    [nb_dec, bs, num_query, num_points, 2].
-        Outs:
-            ret_list (List[Dict]) with length as bs
-                list of result dict for each sample in the batch
-                Dict keys:
-                'lines': numpy.array of shape [num_pred, num_points, 2]
-                'scores': numpy.array of shape [num_pred, ]
-                    after sigmoid
-                'labels': numpy.array of shape [num_pred, ]
-                    dtype=long
-        '''
-
-        preds = preds_dict[-1]
-
-        batched_cls_scores = preds['scores']
-        batched_lines_preds = preds['lines']
-        batch_size = batched_cls_scores.size(0)
-
-        ret_list = []
-        for i in range(len(tokens)):
-
-            cls_scores = batched_cls_scores[i]
-            lines_preds = batched_lines_preds[i]
-            max_num = self.max_lines
-
-            if cls_scores.shape[-1] > self.num_classes:
-                scores, labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
-                final_scores, bbox_index = scores.topk(self.max_lines)
-                final_lines = lines_preds[bbox_index]
-                final_labels = labels[bbox_index]
-            else:
-                cls_scores = cls_scores.sigmoid()
-                final_scores, indexes = cls_scores.view(-1).topk(self.max_lines)
-                final_labels = indexes % self.num_classes
-                bbox_index = indexes // self.num_classes
-                final_lines = lines_preds[bbox_index]
-
-            ret_dict_single = {
-                'token': tokens[i],
-                'lines': final_lines.detach().cpu().numpy() * 2 - 1,
-                'scores': final_scores.detach().cpu().numpy(),
-                'labels': final_labels.detach().cpu().numpy(),
-                'nline': len(final_lines),
-            }
-
-            if gts is not None:
-                lines_gt = gts['lines'][i].detach().cpu().numpy()
-                labels_gt = gts['labels'][i].detach().cpu().numpy()
-                ret_dict_single['groundTruth'] = {
-                    'token': tokens[i],
-                    'nline': lines_gt.shape[0],
-                    'labels': labels_gt,
-                    'lines': lines_gt * 2 - 1,
-                }
-                # if (labels_gt==1).any():
-                #     import ipdb; ipdb.set_trace()
-
-            ret_list.append(ret_dict_single)
-
-        return ret_list
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+from mmdet.models import HEADS
+from mmcv.cnn import Conv2d
+from mmcv.cnn import Linear, build_activation_layer, bias_init_with_prob
+from mmcv.cnn.bricks.transformer import build_positional_encoding
+from mmdet.models.utils import build_transformer
+from mmcv.runner import force_fp32
+
+from mmdet.core import (multi_apply, build_assigner, build_sampler,
+                        reduce_mean)
+from mmdet.models.utils.transformer import inverse_sigmoid
+from mmdet.models import build_loss
+
+from .base_map_head import BaseMapHead
+
+
+@HEADS.register_module()
+class DETRMapFixedNumHead(BaseMapHead):
+
+    def __init__(self,
+                 num_classes=3,
+                 in_channels=128,
+                 num_query=100,
+                 max_lines=50,
+                 score_thre=0.2,
+                 num_reg_fcs=2,
+                 num_points=100,
+                 iterative=False,
+                 patch_size=None,
+                 sync_cls_avg_factor=True,
+                 transformer: dict = None,
+                 positional_encoding: dict = None,
+                 loss_cls: dict = None,
+                 loss_reg: dict = None,
+                 train_cfg: dict = None,
+                 init_cfg=None,
+                 **kwargs):
+        super().__init__()
+
+        assigner = train_cfg['assigner']
+        self.assigner = build_assigner(assigner)
+        # DETR sampling=False, so use PseudoSampler
+        sampler_cfg = dict(type='PseudoSampler')
+        self.sampler = build_sampler(sampler_cfg, context=self)
+
+        self.train_cfg = train_cfg
+        self.max_lines = max_lines
+        self.score_thre = score_thre
+
+        self.num_query = num_query
+        self.in_channels = in_channels
+        self.num_classes = num_classes
+        self.num_points = num_points
+
+        # branch
+        # if loss_cls.use_sigmoid:
+        if loss_cls['use_sigmoid']:
+            self.cls_out_channels = num_classes
+        else:
+            self.cls_out_channels = num_classes+1
+
+        self.iterative = iterative
+        self.num_reg_fcs = num_reg_fcs
+
+        if patch_size is not None:
+            self.register_buffer('patch_size', torch.tensor(
+                (patch_size[1], patch_size[0])),)
+
+        self._build_transformer(transformer, positional_encoding)
+
+        # loss params
+        self.loss_cls = build_loss(loss_cls)
+        self.bg_cls_weight = 0.1
+        if self.loss_cls.use_sigmoid:
+            self.bg_cls_weight = 0.0
+        self.sync_cls_avg_factor = sync_cls_avg_factor
+        self.reg_loss = build_loss(loss_reg)
+
+        # add reg, cls head for each decoder layer
+        self._init_layers()
+        self._init_branch()
+        self.init_weights()
+
+    def _init_layers(self):
+        """Initialize some layer."""
+
+        self.input_proj = Conv2d(
+            self.in_channels, self.embed_dims, kernel_size=1)
+
+        # query_pos_embed & query_embed
+        self.query_embedding = nn.Embedding(self.num_query,
+                                            self.embed_dims)
+
+    def _build_transformer(self, transformer, positional_encoding):
+        # transformer
+        self.act_cfg = transformer.get('act_cfg',
+                                       dict(type='ReLU', inplace=True))
+        self.activate = build_activation_layer(self.act_cfg)
+        self.positional_encoding = build_positional_encoding(
+            positional_encoding)
+        self.transformer = build_transformer(transformer)
+        self.embed_dims = self.transformer.embed_dims
+
+    def _init_branch(self,):
+        """Initialize classification branch and regression branch of head."""
+
+        fc_cls = Linear(self.embed_dims, self.cls_out_channels)
+
+        reg_branch = []
+        for _ in range(self.num_reg_fcs):
+            reg_branch.append(Linear(self.embed_dims, self.embed_dims))
+            reg_branch.append(nn.LayerNorm(self.embed_dims))
+            reg_branch.append(nn.ReLU())
+        reg_branch.append(Linear(self.embed_dims, self.num_points*2))
+        reg_branch = nn.Sequential(*reg_branch)
+        # add sigmoid or not
+
+        def _get_clones(module, N):
+            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+        num_pred = self.transformer.decoder.num_layers
+
+        if self.iterative:
+            fc_cls = _get_clones(fc_cls, num_pred)
+            reg_branch = _get_clones(reg_branch, num_pred)
+
+        self.pre_branches = nn.ModuleDict([
+            ('cls', fc_cls),
+            ('reg', reg_branch), ])
+
+    def init_weights(self):
+        """Initialize weights of the DeformDETR head."""
+
+        for p in self.input_proj.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+
+        self.transformer.init_weights()
+
+        # init prediction branch
+        for k, v in self.pre_branches.items():
+            for param in v.parameters():
+                if param.dim() > 1:
+                    nn.init.xavier_uniform_(param)
+
+        # focal loss init
+        if self.loss_cls.use_sigmoid:
+            bias_init = bias_init_with_prob(0.01)
+            # for last layer
+            if isinstance(self.pre_branches['cls'], nn.ModuleList):
+                for m in self.pre_branches['cls']:
+                    nn.init.constant_(m.bias, bias_init)
+            else:
+                m = self.pre_branches['cls']
+                nn.init.constant_(m.bias, bias_init)
+
+    def forward(self, bev_feature, img_metas=None):
+        '''
+        Args:
+            bev_feature (List[Tensor]): shape [B, C, H, W]
+                feature in bev view
+            img_metas
+        Outs:
+            preds_dict (Dict):
+                all_cls_scores (Tensor): Classification score of all
+                    decoder layers, has shape
+                    [nb_dec, bs, num_query, cls_out_channels].
+                all_lines_preds (Tensor):
+                    [nb_dec, bs, num_query, num_points, 2].
+        '''
+
+        x = bev_feature[0]
+        x = self.input_proj(x)  # only change feature size
+        B, C, H, W = x.shape
+
+        masks = x.new_zeros((B, H, W))
+        pos_embed = self.positional_encoding(masks)
+        # outs_dec: [nb_dec, bs, num_query, embed_dim]
+        outs_dec, _ = self.transformer(x, masks.type(torch.bool), self.query_embedding.weight,
+                                       pos_embed)
+
+        outputs = []
+
+        for i, query_feat in enumerate(outs_dec):
+
+            ocls = self.pre_branches['cls'](query_feat)
+            oreg = self.pre_branches['reg'](query_feat)
+            oreg = oreg.unflatten(dim=2, sizes=(self.num_points, 2))
+            oreg[..., 0:2] = oreg[..., 0:2].sigmoid()  # normalized xyz
+
+            outputs.append(
+                dict(
+                    lines=oreg,  # [bs, num_query, num_points, 2]
+                    scores=ocls,  # [bs, num_query, num_class]
+                )
+            )
+
+        return outputs
+
+    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
+    def _get_target_single(self,
+                           score_pred,
+                           lines_pred,
+                           gt_lines,
+                           gt_labels,
+                           gt_bboxes_ignore=None):
+        """
+            Compute regression and classification targets for one image.
+            Outputs from a single decoder layer of a single feature level are used.
+            Args:
+                cls_score (Tensor): Box score logits from a single decoder layer
+                    for one image. Shape [num_query, cls_out_channels].
+                lines_pred (Tensor):
+                    shape [num_query, num_points, 2].
+                gt_lines (Tensor):
+                    shape [num_gt, num_points, 2].
+                gt_labels (torch.LongTensor)
+                    shape [num_gt, ]
+            Returns:
+                tuple[Tensor]: a tuple containing the following for one image.
+                    - labels (LongTensor): Labels of each image.
+                        shape [num_query, 1]
+                    - label_weights (Tensor]): Label weights of each image.
+                        shape [num_query, 1]
+                    - lines_target (Tensor): Lines targets of each image.
+                        shape [num_query, num_points, 2]
+                    - lines_weights (Tensor): Lines weights of each image.
+                        shape [num_query, num_points, 2]
+                    - pos_inds (Tensor): Sampled positive indices for each image.
+                    - neg_inds (Tensor): Sampled negative indices for each image.
+        """
+
+        num_pred_lines = lines_pred.size(0)
+        # assigner and sampler
+        assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
+                                             gts=dict(lines=gt_lines,
+                                                      labels=gt_labels, ),
+                                             gt_bboxes_ignore=gt_bboxes_ignore)
+        sampling_result = self.sampler.sample(
+            assign_result, lines_pred, gt_lines)
+        pos_inds = sampling_result.pos_inds
+        neg_inds = sampling_result.neg_inds
+
+        # label targets
+        labels = gt_lines.new_full((num_pred_lines, ),
+                                   self.num_classes,
+                                   dtype=torch.long)
+        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
+        label_weights = gt_lines.new_ones(num_pred_lines)
+
+        # bbox targets
+        lines_target = torch.zeros_like(lines_pred)
+        lines_target[pos_inds] = sampling_result.pos_gt_bboxes
+
+        lines_weights = torch.zeros_like(lines_pred)
+        lines_weights[pos_inds] = 1.0
+
+        return (labels, label_weights, lines_target, lines_weights,
+                pos_inds, neg_inds)
+
+    @force_fp32(apply_to=('preds', 'gts'))
+    def get_targets(self, preds, gts, gt_bboxes_ignore_list=None):
+        """
+            Compute regression and classification targets for a batch image.
+            Outputs from a single decoder layer of a single feature level are used.
+            Args:
+                cls_scores_list (list[Tensor]): Box score logits from a single
+                    decoder layer for each image with shape [num_query,
+                    cls_out_channels].
+                lines_preds_list (list[Tensor]): [num_query, num_points, 2].
+                gt_lines_list (list[Tensor]): Ground truth lines for each image
+                    with shape (num_gts, num_points, 2)
+                gt_labels_list (list[Tensor]): Ground truth class indices for each
+                    image with shape (num_gts, ).
+                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
+                    boxes which can be ignored for each image. Default None.
+            Returns:
+                tuple: a tuple containing the following targets.
+                    - labels_list (list[Tensor]): Labels for all images.
+                    - label_weights_list (list[Tensor]): Label weights for all \
+                        images.
+                    - lines_targets_list (list[Tensor]): Lines targets for all \
+                        images.
+                    - lines_weight_list (list[Tensor]): Lines weights for all \
+                        images.
+                    - num_total_pos (int): Number of positive samples in all \
+                        images.
+                    - num_total_neg (int): Number of negative samples in all \
+                        images.
+        """
+        assert gt_bboxes_ignore_list is None, \
+            'Only supports for gt_bboxes_ignore setting to None.'
+
+        (labels_list, label_weights_list,
+         lines_targets_list, lines_weights_list,
+         pos_inds_list, neg_inds_list) = multi_apply(
+             self._get_target_single,
+             preds['scores'], preds['lines'],
+             gts['lines'], gts['labels'],
+             gt_bboxes_ignore=gt_bboxes_ignore_list)
+
+        num_total_pos = sum((inds.numel() for inds in pos_inds_list))
+        num_total_neg = sum((inds.numel() for inds in neg_inds_list))
+        new_gts = dict(
+            labels=labels_list,
+            label_weights=label_weights_list,
+            lines_targets=lines_targets_list,
+            lines_weights=lines_weights_list,
+        )
+
+        return new_gts, num_total_pos, num_total_neg, pos_inds_list
+
+    @force_fp32(apply_to=('preds', 'gts'))
+    def loss_single(self,
+                    preds: dict,
+                    gts: dict,
+                    gt_bboxes_ignore_list=None,
+                    reduction='none'):
+        """ 
+            Loss function for outputs from a single decoder layer of a single
+            feature level.
+            Args:
+                cls_scores (Tensor): Box score logits from a single decoder layer
+                    for all images. Shape [bs, num_query, cls_out_channels].
+                lines_preds (Tensor):
+                    shape [bs, num_query, num_points, 2].
+                gt_lines_list (list[Tensor]): 
+                    with shape (num_gts, num_points, 2)
+                gt_labels_list (list[Tensor]): Ground truth class indices for each
+                    image with shape (num_gts, ).
+                gt_bboxes_ignore_list (list[Tensor], optional): Bounding
+                    boxes which can be ignored for each image. Default None.
+            Returns:
+                dict[str, Tensor]: A dictionary of loss components for outputs from
+                    a single decoder layer.
+        """
+
+        # get target for each sample
+        new_gts, num_total_pos, num_total_neg, pos_inds_list =\
+            self.get_targets(preds, gts, gt_bboxes_ignore_list)
+
+        # batched all data
+        for k, v in new_gts.items():
+            new_gts[k] = torch.cat(v, 0)
+
+        # construct weighted avg_factor to match with the official DETR repo
+        cls_avg_factor = num_total_pos * 1.0 + \
+            num_total_neg * self.bg_cls_weight
+        if self.sync_cls_avg_factor:
+            cls_avg_factor = reduce_mean(
+                preds['scores'].new_tensor([cls_avg_factor]))
+        cls_avg_factor = max(cls_avg_factor, 1)
+
+        # classification loss
+        cls_scores = preds['scores'].reshape(-1, self.cls_out_channels)
+        loss_cls = self.loss_cls(
+            cls_scores, new_gts['labels'], new_gts['label_weights'], avg_factor=cls_avg_factor)
+
+        # Compute the average number of gt boxes accross all gpus, for
+        # normalization purposes
+        num_total_pos = loss_cls.new_tensor([num_total_pos])
+        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()
+
+        # regression L1 loss
+        lines_preds = preds['lines'].reshape(-1, self.num_points, 2)
+        if reduction == 'none':  # For performance analysis
+            loss_reg = self.reg_loss(
+                lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], reduction_override=reduction, avg_factor=num_total_pos)
+        else:
+            loss_reg = self.reg_loss(
+                lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], avg_factor=num_total_pos)
+
+        loss_dict = dict(
+            cls=loss_cls,
+            reg=loss_reg,
+        )
+
+        return (loss_dict, pos_inds_list)
+
+    @force_fp32(apply_to=('gt_lines_list', 'preds_dicts'))
+    def loss(self,
+             gts: dict,
+             preds_dicts: dict,
+             gt_bboxes_ignore=None,
+             reduction='mean'):
+        """
+            Loss Function.
+            Args:
+                gt_lines_list (list[Tensor]): Ground truth lines for each image
+                    with shape (num_gts, num_points, 2)
+                gt_labels_list (list[Tensor]): Ground truth class indices for each
+                    image with shape (num_gts, ).
+                preds_dicts:
+                    all_cls_scores (Tensor): Classification score of all
+                        decoder layers, has shape
+                        [nb_dec, bs, num_query, cls_out_channels].
+                    all_lines_preds (Tensor):
+                        [nb_dec, bs, num_query, num_points, 2].
+                gt_bboxes_ignore (list[Tensor], optional): Bounding boxes
+                    which can be ignored for each image. Default None.
+            Returns:
+                dict[str, Tensor]: A dictionary of loss components.
+        """
+        assert gt_bboxes_ignore is None, \
+            f'{self.__class__.__name__} only supports ' \
+            f'for gt_bboxes_ignore setting to None.'
+
+        # Since there might have multi layer
+        losses, pos_inds_lists, pos_gt_inds_lists = multi_apply(
+            self.loss_single,
+            preds_dicts,
+            gts=gts,
+            gt_bboxes_ignore_list=gt_bboxes_ignore,
+            reduction=reduction)
+
+        # Format the losses
+        loss_dict = dict()
+        # loss from the last decoder layer
+        for k, v in losses[-1].items():
+            loss_dict[k] = v
+
+        # Loss from other decoder layers
+        num_dec_layer = 0
+        for loss in losses[:-1]:
+            for k, v in loss.items():
+                loss_dict[f'd{num_dec_layer}.{k}'] = v
+            num_dec_layer += 1
+
+        return loss_dict, pos_inds_lists, pos_gt_inds_lists
+
+    def post_process(self, preds_dict, tokens, gts):
+        '''
+        Args:
+            preds_dict:
+                all_cls_scores (Tensor): Classification score of all
+                    decoder layers, has shape
+                    [nb_dec, bs, num_query, cls_out_channels].
+                all_lines_preds (Tensor):
+                    [nb_dec, bs, num_query, num_points, 2].
+        Outs:
+            ret_list (List[Dict]) with length as bs
+                list of result dict for each sample in the batch
+                Dict keys:
+                'lines': numpy.array of shape [num_pred, num_points, 2]
+                'scores': numpy.array of shape [num_pred, ]
+                    after sigmoid
+                'labels': numpy.array of shape [num_pred, ]
+                    dtype=long
+        '''
+
+        preds = preds_dict[-1]
+
+        batched_cls_scores = preds['scores']
+        batched_lines_preds = preds['lines']
+        batch_size = batched_cls_scores.size(0)
+
+        ret_list = []
+        for i in range(len(tokens)):
+
+            cls_scores = batched_cls_scores[i]
+            lines_preds = batched_lines_preds[i]
+            max_num = self.max_lines
+
+            if cls_scores.shape[-1] > self.num_classes:
+                scores, labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
+                final_scores, bbox_index = scores.topk(self.max_lines)
+                final_lines = lines_preds[bbox_index]
+                final_labels = labels[bbox_index]
+            else:
+                cls_scores = cls_scores.sigmoid()
+                final_scores, indexes = cls_scores.view(-1).topk(self.max_lines)
+                final_labels = indexes % self.num_classes
+                bbox_index = indexes // self.num_classes
+                final_lines = lines_preds[bbox_index]
+
+            ret_dict_single = {
+                'token': tokens[i],
+                'lines': final_lines.detach().cpu().numpy() * 2 - 1,
+                'scores': final_scores.detach().cpu().numpy(),
+                'labels': final_labels.detach().cpu().numpy(),
+                'nline': len(final_lines),
+            }
+
+            if gts is not None:
+                lines_gt = gts['lines'][i].detach().cpu().numpy()
+                labels_gt = gts['labels'][i].detach().cpu().numpy()
+                ret_dict_single['groundTruth'] = {
+                    'token': tokens[i],
+                    'nline': lines_gt.shape[0],
+                    'labels': labels_gt,
+                    'lines': lines_gt * 2 - 1,
+                }
+                # if (labels_gt==1).any():
+                #     import ipdb; ipdb.set_trace()
+
+            ret_list.append(ret_dict_single)
+
+        return ret_list