Merge branch 'master' into openlane

d3208987 · Wenhai Wang · GitHub · 2341b283 · 198ca8f9 · d3208987
Unverified Commit d3208987 authored May 17, 2023 by Wenhai Wang Committed by GitHub May 17, 2023
7 changed files
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/train.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/train.py
+from __future__ import division
+
+import argparse
+import copy
+import mmcv
+import os
+import time
+import torch
+import warnings
+from mmcv import Config, DictAction
+from mmcv.runner import get_dist_info, init_dist
+from os import path as osp
+
+from mmdet import __version__ as mmdet_version
+from mmdet3d import __version__ as mmdet3d_version
+from mmdet3d.apis import train_model
+from mmdet3d.datasets import build_dataset
+from mmdet3d.utils import collect_env, get_root_logger
+from mmseg import __version__ as mmseg_version
+
+# warper
+from mmdet_train import set_random_seed
+# from builder import build_model
+from mmdet3d.models import build_model
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a detector')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work-dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume-from', help='the checkpoint file to resume from')
+    parser.add_argument(
+        '--no-validate',
+        action='store_true',
+        help='whether not to evaluate the checkpoint during training')
+    group_gpus = parser.add_mutually_exclusive_group()
+    group_gpus.add_argument(
+        '--gpus',
+        type=int,
+        help='number of gpus to use '
+        '(only applicable to non-distributed training)')
+    group_gpus.add_argument(
+        '--gpu-ids',
+        type=int,
+        nargs='+',
+        help='ids of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument('--seed', type=int, default=0, help='random seed')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
+    parser.add_argument(
+        '--options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file (deprecate), '
+        'change to --cfg-options instead.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    parser.add_argument(
+        '--autoscale-lr',
+        action='store_true',
+        help='automatically scale lr with the number of gpus')
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+    if args.options and args.cfg_options:
+        raise ValueError(
+            '--options and --cfg-options cannot be both specified, '
+            '--options is deprecated in favor of --cfg-options')
+    if args.options:
+        warnings.warn('--options is deprecated in favor of --cfg-options')
+        args.cfg_options = args.options
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    # import modules from string list.
+    if cfg.get('custom_imports', None):
+        from mmcv.utils import import_modules_from_strings
+        import_modules_from_strings(**cfg['custom_imports'])
+
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+
+    # import modules, registry will be updated
+    import sys
+    sys.path.append(os.path.abspath('.'))    
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                def import_path(plugin_dir):
+                    _module_dir = os.path.dirname(plugin_dir)
+                    _module_dir = _module_dir.split('/')
+                    _module_path = _module_dir[0]
+
+                    for m in _module_dir[1:]:
+                        _module_path = _module_path + '.' + m
+                    print(f'importing {_module_path}/')
+                    plg_lib = importlib.import_module(_module_path)
+
+                plugin_dirs = cfg.plugin_dir
+                if not isinstance(plugin_dirs,list):
+                    plugin_dirs = [plugin_dirs,]
+                for plugin_dir in plugin_dirs:
+                    import_path(plugin_dir)
+                
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(f'importing {_module_path}/')
+                plg_lib = importlib.import_module(_module_path)
+
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0])
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+    if args.gpu_ids is not None:
+        cfg.gpu_ids = args.gpu_ids
+    else:
+        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
+
+    if args.autoscale_lr:
+        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+        # re-set gpu_ids with distributed training mode
+        _, world_size = get_dist_info()
+        cfg.gpu_ids = range(world_size)
+
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # dump config
+    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
+    # init the logger before other steps
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
+    # specify logger name, if we still use 'mmdet', the output info will be
+    # filtered and won't be saved in the log_file
+    # TODO: ugly workaround to judge whether we are training det or seg model
+    if cfg.model.type in ['EncoderDecoder3D']:
+        logger_name = 'mmseg'
+    else:
+        logger_name = 'mmdet'
+    logger = get_root_logger(
+        log_file=log_file, log_level=cfg.log_level, name=logger_name)
+
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
+    dash_line = '-' * 60 + '\n'
+    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                dash_line)
+    meta['env_info'] = env_info
+    meta['config'] = cfg.pretty_text
+
+    # log some basic info
+    logger.info(f'Distributed training: {distributed}')
+    logger.info(f'Config:\n{cfg.pretty_text}')
+
+    # set random seeds
+    if args.seed is not None:
+        logger.info(f'Set random seed to {args.seed}, '
+                    f'deterministic: {args.deterministic}')
+        set_random_seed(args.seed, deterministic=args.deterministic)
+    cfg.seed = args.seed
+    meta['seed'] = args.seed
+    meta['exp_name'] = osp.basename(args.config)
+
+    model = build_model(
+        cfg.model,
+        train_cfg=cfg.get('train_cfg'),
+        test_cfg=cfg.get('test_cfg'))
+    model.init_weights()
+
+    logger.info(f'Model:\n{model}')
+    cfg.data.train.work_dir = cfg.work_dir
+    cfg.data.val.work_dir = cfg.work_dir
+    datasets = [build_dataset(cfg.data.train)]
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        # in case we use a dataset wrapper
+        if 'dataset' in cfg.data.train:
+            val_dataset.pipeline = cfg.data.train.dataset.pipeline
+        else:
+            val_dataset.pipeline = cfg.data.train.pipeline
+        # set test_mode=False here in deep copied config
+        # which do not affect AP/AR calculation later
+        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
+        val_dataset.test_mode = False
+        datasets.append(build_dataset(val_dataset))
+    if cfg.checkpoint_config is not None:
+        # save mmdet version, config file content and class names in
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmdet_version=mmdet_version,
+            mmseg_version=mmseg_version,
+            mmdet3d_version=mmdet3d_version,
+            config=cfg.pretty_text,
+            CLASSES=None,
+            PALETTE=datasets[0].PALETTE  # for segmentors
+            if hasattr(datasets[0], 'PALETTE') else None)
+    # add an attribute for visualization convenience
+    # model.CLASSES = datasets[0].CLASSES
+    train_model(
+        model,
+        datasets,
+        cfg,
+        distributed=distributed,
+        validate=(not args.no_validate),
+        timestamp=timestamp,
+        meta=meta)
+
+
+if __name__ == '__main__':
+    main()
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/visualization/renderer.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/visualization/renderer.py
+import os.path as osp
+import os
+import numpy as np
+import copy
+import cv2
+import matplotlib.pyplot as plt
+from PIL import Image
+from shapely.geometry import LineString
+
+def remove_nan_values(uv):
+    is_u_valid = np.logical_not(np.isnan(uv[:, 0]))
+    is_v_valid = np.logical_not(np.isnan(uv[:, 1]))
+    is_uv_valid = np.logical_and(is_u_valid, is_v_valid)
+
+    uv_valid = uv[is_uv_valid]
+    return uv_valid
+
+def points_ego2img(pts_ego, extrinsics, intrinsics):
+    pts_ego_4d = np.concatenate([pts_ego, np.ones([len(pts_ego), 1])], axis=-1)
+    pts_cam_4d = extrinsics @ pts_ego_4d.T
+    
+    uv = (intrinsics @ pts_cam_4d[:3, :]).T
+    uv = remove_nan_values(uv)
+    depth = uv[:, 2]
+    uv = uv[:, :2] / uv[:, 2].reshape(-1, 1)
+
+    return uv, depth
+
+def interp_fixed_dist(line, sample_dist):
+        ''' Interpolate a line at fixed interval.
+        
+        Args:
+            line (LineString): line
+            sample_dist (float): sample interval
+        
+        Returns:
+            points (array): interpolated points, shape (N, 2)
+        '''
+
+        distances = list(np.arange(sample_dist, line.length, sample_dist))
+        # make sure to sample at least two points when sample_dist > line.length
+        distances = [0,] + distances + [line.length,] 
+        
+        sampled_points = np.array([list(line.interpolate(distance).coords)
+                                for distance in distances]).squeeze()
+        
+        return sampled_points
+
+def draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsics, color_bgr, thickness):
+    # if 2-dimension, assume z=0
+    if polyline_ego.shape[1] == 2:
+        zeros = np.zeros((polyline_ego.shape[0], 1))
+        polyline_ego = np.concatenate([polyline_ego, zeros], axis=1)
+
+    polyline_ego = interp_fixed_dist(line=LineString(polyline_ego), sample_dist=0.2)
+    
+    uv, depth = points_ego2img(polyline_ego, extrinsics, intrinsics)
+
+    h, w, c = img_bgr.shape
+
+    is_valid_x = np.logical_and(0 <= uv[:, 0], uv[:, 0] < w - 1)
+    is_valid_y = np.logical_and(0 <= uv[:, 1], uv[:, 1] < h - 1)
+    is_valid_z = depth > 0
+    is_valid_points = np.logical_and.reduce([is_valid_x, is_valid_y, is_valid_z])
+
+    if is_valid_points.sum() == 0:
+        return
+    
+    tmp_list = []
+    for i, valid in enumerate(is_valid_points):
+        
+        if valid:
+            tmp_list.append(uv[i])
+        else:
+            if len(tmp_list) >= 2:
+                tmp_vector = np.stack(tmp_list)
+                tmp_vector = np.round(tmp_vector).astype(np.int32)
+                draw_visible_polyline_cv2(
+                    copy.deepcopy(tmp_vector),
+                    valid_pts_bool=np.ones((len(uv), 1), dtype=bool),
+                    image=img_bgr,
+                    color=color_bgr,
+                    thickness_px=thickness,
+                )
+            tmp_list = []
+    if len(tmp_list) >= 2:
+        tmp_vector = np.stack(tmp_list)
+        tmp_vector = np.round(tmp_vector).astype(np.int32)
+        draw_visible_polyline_cv2(
+            copy.deepcopy(tmp_vector),
+            valid_pts_bool=np.ones((len(uv), 1), dtype=bool),
+            image=img_bgr,
+            color=color_bgr,
+            thickness_px=thickness,
+        )
+
+    # uv = np.round(uv[is_valid_points]).astype(np.int32)
+    # draw_visible_polyline_cv2(
+    #     copy.deepcopy(uv),
+    #     valid_pts_bool=np.ones((len(uv), 1), dtype=bool),
+    #     image=img_bgr,
+    #     color=color_bgr,
+    #     thickness_px=thickness,
+    # )
+
+def draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickness_px):
+    """Draw a polyline onto an image using given line segments.
+    Args:
+        line: Array of shape (K, 2) representing the coordinates of line.
+        valid_pts_bool: Array of shape (K,) representing which polyline coordinates are valid for rendering.
+            For example, if the coordinate is occluded, a user might specify that it is invalid.
+            Line segments touching an invalid vertex will not be rendered.
+        image: Array of shape (H, W, 3), representing a 3-channel BGR image
+        color: Tuple of shape (3,) with a BGR format color
+        thickness_px: thickness (in pixels) to use when rendering the polyline.
+    """
+    line = np.round(line).astype(int)  # type: ignore
+    for i in range(len(line) - 1):
+
+        if (not valid_pts_bool[i]) or (not valid_pts_bool[i + 1]):
+            continue
+
+        x1 = line[i][0]
+        y1 = line[i][1]
+        x2 = line[i + 1][0]
+        y2 = line[i + 1][1]
+
+        # Use anti-aliasing (AA) for curves
+        image = cv2.line(image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=thickness_px, lineType=cv2.LINE_AA)
+
+
+COLOR_MAPS_BGR = {
+    # bgr colors
+    'divider': (0, 0, 255),
+    'boundary': (0, 255, 0),
+    'ped_crossing': (255, 0, 0),
+    'centerline': (51, 183, 255),
+    'drivable_area': (171, 255, 255)
+}
+
+COLOR_MAPS_PLT = {
+    'divider': 'r',
+    'boundary': 'g',
+    'ped_crossing': 'b',
+    'centerline': 'orange',
+    'drivable_area': 'y',
+}
+
+CAM_NAMES_AV2 = ['ring_front_center', 'ring_front_right', 'ring_front_left',
+    'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left',
+]
+
+class Renderer(object):
+    """Render map elements on image views.
+    Args:
+        roi_size (tuple): bev range
+    """
+
+    def __init__(self, roi_size):
+        self.roi_size = roi_size
+
+    def render_bev_from_vectors(self, vectors, out_dir):
+        '''Plot vectorized map elements on BEV.
+        
+        Args:
+            vectors (dict): dict of vectorized map elements.
+            out_dir (str): output directory
+        '''
+
+        car_img = Image.open('resources/images/car.png')
+        map_path = os.path.join(out_dir, 'map.jpg')
+
+        plt.figure(figsize=(self.roi_size[0], self.roi_size[1]))
+        plt.xlim(-self.roi_size[0]/2 - 1, self.roi_size[0]/2 + 1)
+        plt.ylim(-self.roi_size[1]/2 - 1, self.roi_size[1]/2 + 1)
+        plt.axis('off')
+        plt.imshow(car_img, extent=[-1.5, 1.5, -1.2, 1.2])
+
+        for cat, vector_list in vectors.items():
+            color = COLOR_MAPS_PLT[cat]
+            for vector in vector_list:
+                pts = np.array(vector)[:, :2]
+                x = np.array([pt[0] for pt in pts])
+                y = np.array([pt[1] for pt in pts])
+                # plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], angles='xy', color=color,
+                #     scale_units='xy', scale=1)
+                plt.plot(x, y, color=color, linewidth=5, marker='o', linestyle='-', markersize=20)
+
+        plt.savefig(map_path, bbox_inches='tight', dpi=40)
+        plt.close()
+        
+    def render_camera_views_from_vectors(self, vectors, imgs, extrinsics, 
+            intrinsics, thickness, out_dir):
+        '''Project vectorized map elements to camera views.
+        
+        Args:
+            vectors (dict): dict of vectorized map elements.
+            imgs (tensor): images in bgr color.
+            extrinsics (array): ego2img extrinsics, shape (4, 4)
+            intrinsics (array): intrinsics, shape (3, 3) 
+            thickness (int): thickness of lines to draw on images.
+            out_dir (str): output directory
+        '''
+
+        for i in range(len(imgs)):
+            img = imgs[i]
+            extrinsic = extrinsics[i]
+            intrinsic = intrinsics[i]
+            img_bgr = copy.deepcopy(img)
+
+            for cat, vector_list in vectors.items():
+                color = COLOR_MAPS_BGR[cat]
+                for vector in vector_list:
+                    img_bgr = np.ascontiguousarray(img_bgr)
+                    vector_array = np.array(vector)
+                    if vector_array.shape[1] > 3:
+                        vector_array = vector_array[:, :3]
+                    draw_polyline_ego_on_img(vector_array, img_bgr, extrinsic, intrinsic, 
+                       color, thickness)
+
+            out_path = osp.join(out_dir, CAM_NAMES_AV2[i]) + '.jpg'
+            cv2.imwrite(out_path, img_bgr)
--- a/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/visualization/visualize.py
+++ b/autonomous_driving/Online-HD-Map-Construction-CVPR2023/tools/visualization/visualize.py
+import argparse
+import mmcv
+from mmcv import Config
+import os
+from renderer import Renderer
+
+CAT2ID = {
+    'ped_crossing': 0,
+    'divider': 1,
+    'boundary': 2,
+}
+
+ID2CAT = {v: k for k, v in CAT2ID.items()}
+
+ROI_SIZE = (60, 30)
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Visualize groundtruth and results')
+    
+    parser.add_argument('log_id', type=str,
+        help='log_id of data to visualize')
+    parser.add_argument('ann_file', 
+        help='gt file to visualize')
+    parser.add_argument('--result', 
+        type=str,
+        help='prediction result to visualize')
+    parser.add_argument('--thr', 
+        type=float,
+        default=0,
+        help='score threshold to filter predictions')
+    parser.add_argument(
+        '--out-dir', 
+        default='demo',
+        help='directory where visualize results will be saved')
+    args = parser.parse_args()
+
+    return args
+
+def import_plugin(cfg):
+    '''
+        import modules, registry will be update
+    '''
+
+    import sys
+    sys.path.append(os.path.abspath('.'))    
+    if hasattr(cfg, 'plugin'):
+        if cfg.plugin:
+            import importlib
+            if hasattr(cfg, 'plugin_dir'):
+                def import_path(plugin_dir):
+                    _module_dir = os.path.dirname(plugin_dir)
+                    _module_dir = _module_dir.split('/')
+                    _module_path = _module_dir[0]
+
+                    for m in _module_dir[1:]:
+                        _module_path = _module_path + '.' + m
+                    print(f'importing {_module_path}/')
+                    plg_lib = importlib.import_module(_module_path)
+
+                plugin_dirs = cfg.plugin_dir
+                if not isinstance(plugin_dirs,list):
+                    plugin_dirs = [plugin_dirs,]
+                for plugin_dir in plugin_dirs:
+                    import_path(plugin_dir)
+                
+            else:
+                # import dir is the dirpath for the config file
+                _module_dir = os.path.dirname(args.config)
+                _module_dir = _module_dir.split('/')
+                _module_path = _module_dir[0]
+                for m in _module_dir[1:]:
+                    _module_path = _module_path + '.' + m
+                print(f'importing {_module_path}/')
+                plg_lib = importlib.import_module(_module_path)
+
+def main(args):
+    log_id = args.log_id
+    ann = mmcv.load(args.ann_file)
+    root_path = os.path.dirname(args.ann_file)
+    out_dir = os.path.join(args.out_dir, str(log_id))
+    
+    log_ann = ann[log_id]
+    renderer = Renderer(roi_size=ROI_SIZE)
+
+    if args.result:
+        result = mmcv.load(args.result)['results']
+
+    for frame in mmcv.track_iter_progress(log_ann):
+        timestamp = frame['timestamp']
+        sensor = frame['sensor']
+        annotation = frame['annotation']
+        imgs = [mmcv.imread(os.path.join(root_path, 'argoverse2', i['image_path'])) for i in sensor.values()]
+        extrinsics = [i['extrinsic'] for i in sensor.values()]
+        intrinsics = [i['intrinsic'] for i in sensor.values()]
+
+        frame_dir = os.path.join(out_dir, timestamp, 'gt')
+        os.makedirs(frame_dir, exist_ok=True)
+        
+        renderer.render_bev_from_vectors(annotation, out_dir=frame_dir)
+        renderer.render_camera_views_from_vectors(annotation, imgs, extrinsics, 
+            intrinsics, 4, frame_dir)
+
+        if args.result:
+            pred = result[timestamp]
+            vectors = {cat: [] for cat in CAT2ID.keys()}
+            for i in range(len(pred['labels'])):
+                score = pred['scores'][i]
+                label = pred['labels'][i]
+                v = pred['vectors'][i]
+
+                if score > args.thr:
+                    vectors[ID2CAT[label]].append(v)
+            
+            frame_dir = os.path.join(out_dir, timestamp, 'pred')
+            os.makedirs(frame_dir, exist_ok=True)
+            renderer.render_bev_from_vectors(vectors, out_dir=frame_dir)
+            renderer.render_camera_views_from_vectors(vectors, imgs, 
+                    extrinsics, intrinsics, 4, frame_dir)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
\ No newline at end of file
--- a/autonomous_driving/occupancy_prediction/README.md
+++ b/autonomous_driving/occupancy_prediction/README.md

-
-
-<div id="top" align="center">
-
-# CVPR 2023 3D Occupancy Prediction Challenge
-**The world's First 3D Occupancy Benchmark for Scene Perception in Autonomous Driving.**
-
-
-
-
-
-<a href="#devkit">
-  <img alt="devkit: v0.1.0" src="https://img.shields.io/badge/devkit-v0.1.0-blueviolet"/>
-</a>
-<a href="#license">
-  <img alt="License: Apache2.0" src="https://img.shields.io/badge/license-Apache%202.0-blue.svg"/>
-</a>
-
-<img src="./figs/occupanc_1.gif" width="696px">
-
-</div>
-
 ## InternImage-based Baseline for CVPR23 Occupancy Prediction Challenge!!!!

 We improve our baseline with a more powerful image backbone: **InaternImage**, which shows its excellent ability within a series of leaderboards and benchmarks, such as *COCO* and *nuScenes*.


-#### openmmlab packages requirements
+#### 1. Requirements
 ```bash
+python>=3.8
 torch==1.12 # recommend
 mmcv-full>=1.5.0
 mmdet==2.24.0
 mmsegmentation==0.24.0
 timm
 numpy==1.22
+mmdet3d==0.18.1 # recommend
 ```

-### Install DCNv3 for InternImage
+
+### 2. Install DCNv3 for InternImage
 ```bash
 cd projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3
 bash make.sh # requires torch>=1.10
 ```

-### Train with InternImage-Small
+### 3. Train with InternImage-Small

 ```bash
 ./tools/dist_train.sh projects/configs/bevformer/bevformer_intern-s_occ.py 8 # consumes less than 14G memory
@@ -51,206 +32,13 @@ bash make.sh # requires torch>=1.10
 Notes: InatenImage provides abundant pre-trained model weights that can be used!!!


-### Performance compared to baseline
+### 4. Performance compared to baseline

 model name|weight| mIoU | others | barrier | bicycle | bus | car | construction_vehicle | motorcycle | pedestrian | traffic_cone | trailer |  truck | driveable_surface | other_flat | sidewalk | terrain | manmade | vegetation | 
 ----|:----------:| :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :----------------------: | :---: | :------: | :------: |
 bevformer_intern-s_occ|[Google Drive](https://drive.google.com/file/d/1LV9K8hrskKf51xY1wbqTKzK7WZmVXEV_/view?usp=sharing)| 25.11 | 6.93 | 35.57 | 10.40 | 35.97 | 41.23 | 13.72 | 20.30 | 21.10 | 18.34 | 19.18 | 28.64 | 49.82 | 30.74 | 31.00 | 27.44 | 19.29 | 17.29 | 
 bevformer_base_occ|[Google Drive](https://drive.google.com/file/d/1NyoiosafAmne1qiABeNOPXR-P-y0i7_I/view?usp=share_link)| 23.67 | 5.03 | 38.79 | 9.98 | 34.41 | 41.09 | 13.24 | 16.50 | 18.15 | 17.83 | 18.66 | 27.70 | 48.95 | 27.73 | 29.08 | 25.38 | 15.41 | 14.46 | 

-## Table of Contents
- [CVPR 2023 Occupancy Prediction Challenge](#cvpr-2023-occupancy-prediction-challenge)
-  - [Introduction](#introduction)
-  - [Task Definition](#task-definition)
-    - [Rules for Occupancy Challenge](#rules-for-occupancy-challenge)
-  - [Evaluation Metrics](#evaluation-metrics)
-    - [mIoU](#miou)
-    - [F Score](#f-score)
-  - [Data](#data)
-    - [Basic Information](#basic-information)
-    - [Download](#download)
-    - [Hierarchy](#hierarchy)
-    - [Known Issues](#known-issues)
-  - [Getting Started](#getting-started)
-  - [Timeline](#challenge-timeline)
-  - [Leaderboard](#leaderboard)
-  - [License](#license)
-
-
-## Introduction
-Understanding the 3D surroundings including the background stuffs and foreground objects is important for autonomous driving. In the traditional 3D object detection task, a foreground object is represented by the 3D bounding box. However, the geometrical shape of the object is complex, which can not be represented by a simple 3D box, and the perception of the background is absent. The goal of this task is to predict the 3D occupancy of the scene. In this task, we provide a large-scale occupancy benchmark based on the nuScenes dataset. The benchmark is a voxelized representation of the 3D space, and the occupancy state and semantics of the voxel in 3D space are jointly estimated in this task. The complexity of this task lies in the dense prediction of 3D space given the surround-view image.
-
-<p align="right">(<a href="#top">back to top</a>)</p>
-
-## Task Definition
-Given images from multiple cameras, the goal is to predict the current occupancy state and semantics of each voxel grid in the scene. The voxel state is predicted to be either free or occupied. If a voxel is occupied, its semantic class needs to be predicted, as well. Besides, we also provide a binary observed/unobserved mask for each frame. An observed voxel is defined as an invisible grid in the current camera observation, which is ignored in the evaluation stage.
-
-### Rules for Occupancy Challenge
-* We allow using annotations provided in the nuScenes dataset, and during inference, the input modality of the model should be camera only. 
-* Other public/private datasets are not allowed in the challenge in any form (except ImageNet or MS-COCO pre-trained image backbone). 
-* No future frame is allowed during inference.
-* In order to check the compliance, we will ask the participants to provide technical reports to the challenge committee and the participant will be asked to provide a public talk about the method after winning the award.
-
-<p align="right">(<a href="#top">back to top</a>)</p>
-
-## Evaluation Metrics
-Leaderboard ranking for this challenge is by the intersection-over-union (mIoU) over all classes. 
-### mIoU
-
-Let $C$ be he number of classes. 
-
-$$
-    mIoU=\frac{1}{C}\displaystyle \sum_{c=1}^{C}\frac{TP_c}{TP_c+FP_c+FN_c},
-$$
-
-where $TP_c$ , $FP_c$ , and $FN_c$ correspond to the number of true positive, false positive, and false negative predictions for class $c_i$.
-
-### F-Score
-We also measure the F-score as the harmonic mean of the completeness $P_c$ and the accuracy $P_a$.
-
-$$
-    F-score=\left( \frac{P_a^{-1}+P_c^{-1}}{2} \right) ^{-1} ,
-$$
-
-where $P_a$ is the percentage of predicted voxels that are within a distance threshold to the ground truth voxels, and $P_c$ is the percentage of ground truth voxels that are within a distance threshold to the predicted voxels.
-
-<p align="right">(<a href="#top">back to top</a>)</p>
-
-
-## Data
-<div id="top"  align="center">
-<img src="./figs/mask.jpg">
-</div>
-<div id="top" align="center">
-Figure 1. Semantic labels (left), visibility masks in the LiDAR (middle) and the camera (right) view. Grey voxels are unobserved in LiDAR view and white voxels are observed in the accumulative LiDAR view but unobserved in the current camera view.
-</div>
-
-### Basic Information
-<div align="center">
-  
-| Type |  Info |
-| :----: | :----: |
-| mini            | 404 |
-| train           | 28,130 |
-| val             | 6,019 |
-| test            | 6,006 |
-| cameras         | 6 |
-| voxel size      | 0.4m |
-| range           | [-40m, -40m, -1m, 40m, 40m, 5.4m]|
-| volume size     | [200, 200, 16]|
-| #classes        | 0 - 17 |
-  
-</div>
-
- The dataset contains 18 classes. The definition of classes from 0 to 16 is the same as the [nuScenes-lidarseg](https://github.com/nutonomy/nuscenes-devkit/blob/fcc41628d41060b3c1a86928751e5a571d2fc2fa/python-sdk/nuscenes/eval/lidarseg/README.md) dataset. The label 17 category represents voxels that are not occupied by anything, which is named as `free`. Voxel semantics for each sample frame is given as `[semantics]` in the labels.npz. 
-
- <strong>How are the labels annotated?</strong> The ground truth labels of occupancy derive from accumulative LiDAR scans with human annotations. 
-  - If a voxel reflects a LiDAR point, then it is assigned as the same semantic label as the LiDAR point;
-  - If a LiDAR beam passes through a voxel in the air, the voxel is set to be `free`;
-  - Otherwise, we set the voxel to be unknown, or unobserved. This happens due to the sparsity of the LiDAR or the voxel is occluded, e.g. by a wall. In the dataset, `[mask_lidar]` is a 0-1 binary mask, where 0's represent unobserved voxels. As shown in Fig.1(b), grey voxels are unobserved. Due to the limitation of the visualization tool, we only show unobserved voxels at the same height as the ground. 
-
- <strong>Camera visibility.</strong> Note that the installation positions of LiDAR and cameras are different, therefore, some observed voxels in the  LiDAR view are not seen by the cameras. Since we focus on a vision-centric task, we provide a binary voxel mask `[mask_camera]`, indicating whether the voxels are observed or not in the current camera view. As shown in Fig.1(c), white voxels are observed in the accumulative LiDAR view but unobserved in the current camera view.
-
- Both `[mask_lidar]` and `[mask_camera]` masks are optional for training. Participants do not need to predict the masks. Only `[mask_camera]` is used for evaluation; the unobserved voxels are not involved during calculating the F-score and mIoU.
-
-
-### Download
-The files mentioned below can also be downloaded via <img src="https://user-images.githubusercontent.com/29263416/222076048-21501bac-71df-40fa-8671-2b5f8013d2cd.png" alt="OpenDataLab" width="18"/>[OpenDataLab](https://opendatalab.com/CVPR2023-3D-Occupancy/download).It is recommended to use provided [command line interface](https://opendatalab.com/CVPR2023-3D-Occupancy/cli) for acceleration.
-
-| Subset | Google Drive <img src="https://ssl.gstatic.com/docs/doclist/images/drive_2022q3_32dp.png" alt="Google Drive" width="18"/> | Baidu Cloud <img src="https://nd-static.bdstatic.com/m-static/v20-main/favicon-main.ico" alt="Baidu Yun" width="18"/> | Size |
-| :---: | :---: | :---: | :---: |
-| mini | [data](https://drive.google.com/drive/folders/1ksWt4WLEqOxptpWH2ZN-t1pjugBhg3ME?usp=share_link) | [data](https://pan.baidu.com/s/1IvOoJONwzKBi32Ikjf8bSA?pwd=5uv6)  | approx. 440M |
-| trainval  | [data](https://drive.google.com/drive/folders/1JObO75iTA2Ge5fa8D3BWC8R7yIG8VhrP?usp=share_link) | [data](https://pan.baidu.com/s/1_4yE0__UDIJS8JtBSB0Bpg?pwd=li5h) | approx. 32G |
-| test | coming soon | coming soon | ~ |
-
-* Mini and trainval data contain three parts -- `imgs`, `gts` and `annotations`. The `imgs` datas have the same hierarchy with the image samples in the original nuScenes dataset.
-
-
-### Hierarchy
-The hierarchy of folder `Occpancy3D-nuScenes-V1.0/` is described below:
-```
-└── Occpancy3D-nuScenes-V1.0
-    |
-    ├── mini
-    |
-    ├── trainval
-    |   ├── imgs
-    |   |   ├── CAM_BACK
-    |   |   |   ├── n015-2018-07-18-11-07-57+0800__CAM_BACK__1531883530437525.jpg
-    |   |   |   └── ...
-    |   |   ├── CAM_BACK_LEFT
-    |   |   |   ├── n015-2018-07-18-11-07-57+0800__CAM_BACK_LEFT__1531883530447423.jpg
-    |   |   |   └── ...
-    |   |   └── ...
-    |   |     
-    |   ├── gts  
-    |   |   ├── [scene_name]
-    |   |   |   ├── [frame_token]
-    |   |   |   |   └── labels.npz
-    |   |   |   └── ...
-    |   |   └── ...
-    |   |
-    |   └── annotations.json
-    |
-    └── test
-        ├── imgs
-        └── annotations.json
-
-```
- `imgs/` contains images captured by various cameras.
- `gts/` contains the ground truth of each sample. `[scene_name]` specifies a sequence of frames, and `[frame_token]` specifies a single frame in a sequence.
- `annotations.json` contains meta infos of the dataset.
- `labels.npz` contains `[semantics]`, `[mask_lidar]`, and `[mask_camera]` for each frame. 
-
-```
-annotations {
-    "train_split": ["scene-0001", ...],                         <list> -- training dataset split by scene_name
-    "val_split": list ["scene-0003", ...],                      <list> -- validation dataset split by scene_name
-    "scene_infos" {                                             <dict> -- meta infos of the scenes    
-        [scene_name]: {                                         <str> -- name of the scene.  
-            [frame_token]: {                                    <str> -- samples in a scene, ordered by time
-                    "timestamp":                                <str> -- timestamp (or token), unique by sample
-                    "camera_sensor": {                          <dict> -- meta infos of the camera sensor
-                        [cam_token]: {                          <str> -- token of the camera
-                            "img_path":                         <str> -- corresponding image file path, *.jpg
-                            "intrinsic":                        <float> [3, 3] -- intrinsic camera calibration
-                            "extrinsic":{                       <dict> -- extrinsic parameters of the camera
-                                "translation":                  <float> [3] -- coordinate system origin in meters
-                                "rotation":                     <float> [4] -- coordinate system orientation as quaternion
-                            }   
-                            "ego_pose": {                       <dict> -- vehicle pose of the camera
-                                "translation":                  <float> [3] -- coordinate system origin in meters
-                                "rotation":                     <float> [4] -- coordinate system orientation as quaternion
-                            }                
-                        },
-                        ...
-                    },
-                    "ego_pose": {                               <dict> -- vehicle pose
-                        "translation":                          <float> [3] -- coordinate system origin in meters
-                        "rotation":                             <float> [4] -- coordinate system orientation as quaternion
-                    },
-                    "gt_path":                                  <str> -- corresponding 3D voxel gt path, *.npz
-                    "next":                                     <str> -- frame_token of the previous keyframe in the scene 
-                    "prev":                                     <str> -- frame_token of the next keyframe in the scene
-                }
-            ]             
-        }
-    }
-}
-```
-
-### Known Issues
- Nuscene ([issues-721](https://github.com/nutonomy/nuscenes-devkit/issues/721)) lacks translation in the z-axis, which makes it hard to recover accurate 6d localization and would lead to the misalignment of point clouds while accumulating them over whole scenes. Ground stratification occurs in several data.
-
-<p align="right">(<a href="#top">back to top</a>)</p>
-
-## Getting Started
-
-We provide a baseline model based on [BEVFormer](https://github.com/fundamentalvision/BEVFormer).
-
-Please refer to [getting_started](docs/getting_started.md) for details.
-
-<p align="right">(<a href="#top">back to top</a>)</p>


 ## Challenge Timeline

--- a/autonomous_driving/openlane-v2/README.md
+++ b/autonomous_driving/openlane-v2/README.md
@@ -49,10 +49,253 @@ Notes: InternImage provides abundant pre-trained model weights that can be used!
 ## Leaderboard 
 To be released.

+<div id="top" align="center">
+
+# OpenLane-V2
+**The World's First Perception and Reasoning Benchmark for Scene Structure in Autonomous Driving.**
+
+<a href="#data">
+  <img alt="OpenLane-v2: v1.0" src="https://img.shields.io/badge/OpenLane--V2-v1.0-blueviolet"/>
+</a>
+<a href="#devkit">
+  <img alt="devkit: v0.1.0" src="https://img.shields.io/badge/devkit-v0.1.0-blueviolet"/>
+</a>
+<a href="#license">
+  <img alt="License: Apache2.0" src="https://img.shields.io/badge/license-Apache%202.0-blue.svg"/>
+</a>
+
+**English | [中文](./README-zh-hans.md)**
+
+_In terms of ambiguity, the English version shall prevail._
+
+
+<img src="./imgs/poster.gif" width="696px">
+
+</div>
+
+<br>
+
+> The dataset name OpenLane-V2, is termed as **RoadGenome** at Huawei.
+
+## Table of Contents
+- [News](#news)
+- [Benchmark and Leaderboard](#benchmark-and-leaderboard)
+- [Highlight](#highlight---why-we-are-exclusive)
+- [Task](#task)
+  - [3D Lane Detection 🛣️](#3d-lane-detection-%EF%B8%8F)
+  - [Traffic Element Recognition 🚥](#traffic-element-recognition-)
+  - [Topology Recognition 🕸️](#topology-recognition-%EF%B8%8F)
+- [Data](#data)
+- [Devkit](#devkit)
+- [Get Started](#get-started)
+- [Train a Model](#train-a-model)
+- [Citation](#citation)
+- [License](#license)
+
+## News
+- [2023/03]
+  *  We are hosting a Challenge at the [CVPR 2023 Workshop](https://opendrivelab.com/AD23Challenge.html) :star:.
+- [2023/02]
+  * Dataset `v1.0`: Data of `subset_A` released.
+  * Baseline model released.
+- [2023/01]
+  * Dataset `v0.1`: Initial OpenLane-V2 dataset sample released.
+  * Devkit `v0.1.0`: Initial OpenLane-V2 devkit released.
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Benchmark and Leaderboard
+We will provide an initial benchmark on the OpenLane-V2 dataset, please stay tuned for the release.
+
+Currently, we are maintaining leaderboards on the [*val*](https://paperswithcode.com/sota/3d-lane-detection-on-openlane-v2-2) and [*test*](https://eval.ai/web/challenges/challenge-page/1925/leaderboard/4549) split of `subset_A`.
+
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Highlight - why we are exclusive?
+
+### The world is three-dimensional - Introducing 3D lane
+Previous datasets annotate lanes on images in the perspective view. Such a type of 2D annotation is insufficient to fulfill real-world requirements.
+Following the [OpenLane](https://github.com/OpenDriveLab/OpenLane) dataset, we annotate **lanes in 3D space** to reflect their properties in the real world.
+
+### Be aware of traffic signals - Recognizing Extremely Small road elements
+Not only preventing collision but also facilitating efficiency is essential. 
+Vehicles follow predefined traffic rules for self-disciplining and cooperating with others to ensure a safe and efficient traffic system.
+**Traffic elements** on the roads, such as traffic lights and road signs, provide practical and real-time information.
+
+### Beyond perception - Topology Reasoning between lane and road elements 
+A traffic element is only valid for its corresponding lanes. 
+Following the wrong signals would be catastrophic. 
+Also, lanes have their predecessors and successors to build the map. 
+Autonomous vehicles are required to **reason** about the **topology relationships** to drive in the right way.
+In this dataset, we hope to shed light on the task of **scene structure perception and reasoning**.
+
+### Data scale and diversity matters - building on Top of Awesome Benchmarks
+Experience from the sunny day does not apply to the dancing snowflakes.
+For machine learning, data is the must-have food.
+We provide annotations on data collected in various cities, from Austin to Singapore and from Boston to Miami.
+The **diversity** of data enables models to generalize in different atmospheres and landscapes.
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Task
+The primary task of the dataset is **scene structure perception and reasoning**, which requires the model to recognize the dynamic drivable states of lanes in the surrounding environment. 
+The challenge of this dataset includes not only detecting lane centerlines and traffic elements but also recognizing the attribute of traffic elements and topology relationships on detected objects.
+We define the **[OpenLane-V2 Score (OLS)](./docs/metrics.md#openlane-v2-score)**, which is the average of various metrics covering different aspects of the primary task:
+
+$$
+\text{OLS} = \frac{1}{4} \bigg[ \text{DET}_{l} + \text{DET}_{t} + f(\text{TOP}_{ll}) + f(\text{TOP}_{lt}) \bigg].
+$$
+
+The metrics of different subtasks are described below.
+
+### 3D Lane Detection 🛣️
+The [OpenLane](https://github.com/OpenDriveLab/OpenLane) dataset, which is the first real-world and the largest scaled 3D lane dataset to date, provides lane line annotations in 3D space.
+Similarly, we annotate 3D lane centerlines and include the F-Score for evaluating predicted results  of undirected lane centerlines.
+Furthermore, we define the subtask of 3D lane detection as detecting directed 3D lane centerlines from the given multi-view images covering the whole horizontal FOV.
+The instance-level evaluation metric of average precision $\text{DET}_{l}$ is utilized to measure the detection performance on lane centerlines (l).
+
+<p align="center">
+  <img src="./imgs/lane.gif" width="696px" >
+</p>
+
+### Traffic Element Recognition 🚥
+Traffic elements and their attribute provide crucial information for autonomous vehicles.
+The attribute represents the semantic meaning of a traffic element, such as the red color of a traffic light. 
+In this subtask, on the given image in the front view, the location of traffic elements (traffic lights and road signs) and their attributes are demanded to be perceived simultaneously.
+Compared to typical 2D detection datasets, the challenge is that the size of traffic elements is tiny due to the large scale of outdoor environments.
+Similar to the typical 2D detection task, the metric of $\text{DET}_{t}$ is utilized to measure the performance of traffic elements (t) detection averaged over different attributes.
+
+<p align="center">
+  <img src="./imgs/traffic_element.gif" width="696px" >
+</p>
+
+### Topology Recognition 🕸️
+We first define the task of recognizing topology relationships in the field of autonomous driving.
+Given multi-view images, the model learns to recognize the topology relationships among lane centerlines and between lane centerlines and traffic elements.
+The most similar task is link prediction in the field of graph, in which the vertices are given and only edges are predicted by models.
+In our case, both vertices and edges are unknown for the model.
+Thus, lane centerlines and traffic elements are needed to be detected first, and then the topology relationships are built.
+Adapted from the task of link prediction, $\text{TOP}$ is used for topology among lane centerlines (ll) and between lane centerlines and traffic elements (lt).
+
+<p align="center">
+  <img src="./imgs/topology.gif" width="696px" >
+</p>
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Data
+The OpenLane-V2 dataset is a large-scale dataset for scene structure perception and reasoning in the field of autonomous driving. 
+Following [OpenLane](https://github.com/OpenDriveLab/OpenLane), the first 3D lane dataset, we provide lane annotations in 3D space.
+The difference is that instead of lane lines, we annotate lane centerlines, which can be served as the trajectory for autonomous vehicles.
+Besides, we provide annotations on traffic elements (traffic lights and road signs) and their attribute, and the topology relationships among lane centerlines and between lane centerlines and traffic elements.
+
+The dataset is divided into two subsets. 
+**The `subset_A` serves as the primary subset and is utilized for the coming challenges and leaderboard, in which no external data, including the other subset, is allowed**.
+The `subset_B` can be used to test the generalization ability of the model.
+For more details, please refer to the corresponding pages: [use of data](./data/README.md), [notes of annotation](./docs/annotation.md), and [dataset statistics](./docs/statistics.md).
+
+[Download](./data/README.md#download) now to discover our dataset!
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Devkit
+We provide a devkit for easy access to the OpenLane-V2 dataset.
+After installing the package, the use of the dataset, such as loading images, loading meta data, and evaluating results, can be accessed through the API of `openlanv2`.
+For more details on the API, please refer to [devkit](./docs/devkit.md).
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+
+## Get Started
+Please follow the steps below to get familiar with the OpenLane-V2 dataset.
+
+1. Run the following commands to install the environment for setting up the dataset:
+
+    ```sh
+    git clone https://github.com/OpenDriveLab/OpenLane-V2.git
+    cd OpenLane-V2
+    conda create -n openlanev2 python=3.8 -y
+    conda activate openlanev2
+    pip install -r requirements.txt
+    python setup.py develop
+    ```
+
+2. Use [links](./data/README.md#download) to download data manually from 
+
+    - <img src="https://user-images.githubusercontent.com/29263416/222076048-21501bac-71df-40fa-8671-2b5f8013d2cd.png" alt="OpenDataLab" width="18"/> OpenDataLab,
+    - <img src="https://ssl.gstatic.com/docs/doclist/images/drive_2022q3_32dp.png" alt="Google Drive" width="18"/> Google Drive,
+    - <img src="https://nd-static.bdstatic.com/m-static/v20-main/favicon-main.ico" alt="Baidu Yun" width="18"/> Baidu Yun.
+
+    Then put them into the `data/OpenLane-V2/` folder and unzip them. 
+    The resulting folder hierarchy is described [here](./data/README.md#hierarchy).
+    Or use the following commands to download example data for a quick glance at the dataset:
+
+    ```sh
+    cd data/OpenLane-V2
+    wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1Ni-L6u1MGKJRAfUXm39PdBIxdk_ntdc6' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1Ni-L6u1MGKJRAfUXm39PdBIxdk_ntdc6" -O OpenLane-V2_sample.tar
+    md5sum -c openlanev2.md5
+    tar -xvf *.tar
+    cd ../..
+    ```
+
+3. Run the [tutorial](./tutorial.ipynb) on jupyter notebook to get familiar with the dataset and devkit.
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+
+## Train a Model
+Plug-ins to prevail deep learning frameworks for training models are provided to start training models on the OpenLane-V2 dataset.
+We appreciate your valuable feedback and contributions to plug-ins on different frameworks.
+
+### mmdet3d
+
+The [plug-in](./plugin/mmdet3d/) to MMDetection3d is built on top of [mmdet3d v1.0.0rc6](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0rc6) and tested under:
+- Python 3.8.15
+- PyTorch 1.9.1
+- CUDA 11.1
+- GCC 5.4.0
+- mmcv-full==1.5.2
+- mmdet==2.26.0
+- mmsegmentation==0.29.1
+
+Please follow the [instruction](https://github.com/open-mmlab/mmdetection3d/blob/v1.0.0rc6/docs/en/getting_started.md) to install mmdet3d.
+Assuming OpenLane-V2 is installed under `OpenLane-V2/` and mmdet3d is built under `mmdetection3d/`, create a soft link to the plug-in file:
+```
+└── mmdetection3d
+    └── projects
+        ├── example_project
+        └── openlanev2 -> OpenLane-V2/plugin/mmdet3d
+```
+Then you can train or evaluate a model using the config `mmdetection3d/projects/openlanev2/configs/baseline.py`, whose path is replaced accordingly.
+Options can be passed to enable supported functions during evaluation, such as `--eval-options dump=True dump_dir=/PATH/TO/DUMP` to save pickle file for submission and `--eval-options visualization=True visualization_dir=/PATH/TO/VIS` for visualization.
+
+
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+
+## Citation
+Please use the following citation when referencing OpenLane-V2:
+
+```bibtex
+@misc{ openlanev2_dataset,
+  author = {{OpenLane-V2 Dataset Contributors}},
+  title = {{OpenLane-V2: The World's First Perception and Reasoning Benchmark for Scene Structure in Autonomous Driving}},
+  url = {https://github.com/OpenDriveLab/OpenLane-V2},
+  license = {Apache-2.0},
+  year = {2023}
+}
+```
+
 <p align="right">(<a href="#top">back to top</a>)</p>

 ## License
+
 Before using the dataset, you should register on the website and agree to the terms of use of the [nuScenes](https://www.nuscenes.org/nuscenes).
+
+Our dataset is built on top of the [nuScenes](https://www.nuscenes.org/nuscenes) and [Argoverse 2](https://www.argoverse.org/av2.html) datasets.
+Before using the OpenLane-V2 dataset, you should agree to the terms of use of the [nuScenes](https://www.nuscenes.org/nuscenes) and [Argoverse 2](https://www.argoverse.org/av2.html) datasets respectively.
+
 All code within this repository is under [Apache License 2.0](./LICENSE).

 <p align="right">(<a href="#top">back to top</a>)</p>
--- a/classification/README.md
+++ b/classification/README.md
@@ -2,6 +2,17 @@

 This folder contains the implementation of the InternImage for image classification.

+<!-- TOC -->
+* [Install](#install)
+* [Data Preparation](#data-preparation)
+* [Evaluation](#evaluation)
+* [Training from Scratch on ImageNet-1K](#training-from-scratch-on-imagenet-1k)
+* [Manage Jobs with Slurm.](#manage-jobs-with-slurm)
+* [Training with Deepspeed](#training-with-deepspeed)
+* [Extracting Intermediate Features](#extracting-intermediate-features)
+* [Export](#export)
+<!-- TOC -->
+
 ## Usage

 ### Install
@@ -259,6 +270,18 @@ Then, you could use `best.pth` as usual, e.g., `model.load_state_dict(torch.load

 > Due to the lack of computational resources, the deepspeed training scripts are currently only verified for the first few epochs. Please fire an issue if you have problems for reproducing the whole training.

+### Extracting Intermediate Features
+
+To extract the features of an intermediate layer, you could use `extract_feature.py`. 
+
+For example, extract features of `b.png` from layers `patch_embed` and `levels.0.downsample` and save them to 'b.pth'.
+
+```bash
+python extract_feature.py --cfg configs/internimage_t_1k_224.yaml --img b.png --keys patch_embed levels.0.downsample --save --resume internimage_t_1k_224.pth
+```
+
+
+
 ### Export

 To export `InternImage-T` from PyTorch to ONNX, run:

--- a/classification/extract_feature.py
+++ b/classification/extract_feature.py
+import functools
+from collections import OrderedDict
+
+
+# using wonder's beautiful simplification:
+# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427
+def rgetattr(obj, attr, *args):
+    def _getattr(obj, attr):
+        return getattr(obj, attr, *args)
+
+    return functools.reduce(_getattr, [obj] + attr.split('.'))
+
+
+class IntermediateLayerGetter:
+    def __init__(self, model, return_layers, keep_output=True):
+        """Wraps a Pytorch module to get intermediate values
+
+        Arguments:
+            model {nn.module} -- The Pytorch module to call
+            return_layers {dict} -- Dictionary with the selected submodules
+            to return the output (format: {[current_module_name]: [desired_output_name]},
+            current_module_name can be a nested submodule, e.g. submodule1.submodule2.submodule3)
+
+        Keyword Arguments:
+            keep_output {bool} -- If True model_output contains the final model's output
+            in the other case model_output is None (default: {True})
+
+        Returns:
+            (mid_outputs {OrderedDict}, model_output {any}) -- mid_outputs keys are
+            your desired_output_name (s) and their values are the returned tensors
+            of those submodules (OrderedDict([(desired_output_name,tensor(...)), ...).
+            See keep_output argument for model_output description.
+            In case a submodule is called more than one time, all it's outputs are
+            stored in a list.
+        """
+        self._model = model
+        self.return_layers = return_layers
+        self.keep_output = keep_output
+
+    def __call__(self, *args, **kwargs):
+        ret = OrderedDict()
+        handles = []
+        for name, new_name in self.return_layers.items():
+            layer = rgetattr(self._model, name)
+
+            def hook(module, input, output, new_name=new_name):
+                if new_name in ret:
+                    if type(ret[new_name]) is list:
+                        ret[new_name].append(output)
+                    else:
+                        ret[new_name] = [ret[new_name], output]
+                else:
+                    ret[new_name] = output
+
+            try:
+                h = layer.register_forward_hook(hook)
+            except AttributeError as e:
+                raise AttributeError(f'Module {name} not found')
+            handles.append(h)
+
+        if self.keep_output:
+            output = self._model(*args, **kwargs)
+        else:
+            self._model(*args, **kwargs)
+            output = None
+
+        for h in handles:
+            h.remove()
+
+        return ret, output
+
+
+def main(args, config):
+    from models import build_model
+    import torchvision.transforms as T
+    from PIL import Image
+
+    model = build_model(config)
+    checkpoint = torch.load(config.MODEL.RESUME, map_location='cpu')
+    model.load_state_dict(checkpoint['model'], strict=False)
+    model.cuda()
+
+    # examples:
+    # return_layers = {
+    #     'patch_embed': 'patch_embed',
+    #     'levels.0.downsample': 'levels.0.downsample',
+    #     'levels.0.blocks.0.dcn': 'levels.0.blocks.0.dcn',
+    # }
+    return_layers = {k: k for k in args.keys}
+    mid_getter = IntermediateLayerGetter(model, return_layers=return_layers, keep_output=True)
+
+    image = Image.open(args.img)
+
+    transforms = T.Compose([
+        T.Resize(config.DATA.IMG_SIZE),
+        T.ToTensor(),
+        T.Normalize(config.AUG.MEAN, config.AUG.STD)
+    ])
+    image = transforms(image)
+    image = image.unsqueeze(0)
+    image = image.cuda()
+
+    mid_outputs, model_output = mid_getter(image)
+
+    for k, v in mid_outputs.items():
+        print(k, v.shape)
+
+    return mid_outputs, model_output
+
+
+if __name__ == '__main__':
+    import argparse
+    import torch
+    from config import get_config
+
+    parser = argparse.ArgumentParser('Get Intermediate Layer Output')
+    parser.add_argument('--cfg', type=str, required=True, metavar="FILE", help='Path to config file')
+    parser.add_argument('--img', type=str, required=True, metavar="FILE", help='Path to img file')
+    parser.add_argument("--keys", default=None, nargs='+', help="The intermediate layer's keys you want to save.")
+    parser.add_argument('--resume', help='resume from checkpoint')
+    parser.add_argument('--save', action='store_true', help='Save the results.')
+    args = parser.parse_args()
+    config = get_config(args)
+
+    mid_outputs, model_output = main(args, config)
+
+    if args.save:
+        torch.save(mid_outputs, args.img[:-3] + '.pth')
\ No newline at end of file