Initial commit

a8562a56 · luopl · a8562a56 · a8562a56 · a8562a56 · a8562a56
Commit a8562a56 authored Aug 20, 2024 by luopl
20 changed files
--- a/demo/video_gpuaccel_demo.py
+++ b/demo/video_gpuaccel_demo.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+from typing import Tuple
+
+import cv2
+import mmcv
+import numpy as np
+import torch
+import torch.nn as nn
+from mmcv.transforms import Compose
+from mmengine.utils import track_iter_progress
+
+from mmdet.apis import init_detector
+from mmdet.registry import VISUALIZERS
+from mmdet.structures import DetDataSample
+
+try:
+    import ffmpegcv
+except ImportError:
+    raise ImportError(
+        'Please install ffmpegcv with:\n\n    pip install ffmpegcv')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='MMDetection video demo with GPU acceleration')
+    parser.add_argument('video', help='Video file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--score-thr', type=float, default=0.3, help='Bbox score threshold')
+    parser.add_argument('--out', type=str, help='Output video file')
+    parser.add_argument('--show', action='store_true', help='Show video')
+    parser.add_argument(
+        '--nvdecode', action='store_true', help='Use NVIDIA decoder')
+    parser.add_argument(
+        '--wait-time',
+        type=float,
+        default=1,
+        help='The interval of show (s), 0 is block')
+    args = parser.parse_args()
+    return args
+
+
+def prefetch_batch_input_shape(model: nn.Module, ori_wh: Tuple[int,
+                                                               int]) -> dict:
+    cfg = model.cfg
+    w, h = ori_wh
+    cfg.test_dataloader.dataset.pipeline[0].type = 'LoadImageFromNDArray'
+    test_pipeline = Compose(cfg.test_dataloader.dataset.pipeline)
+    data = {'img': np.zeros((h, w, 3), dtype=np.uint8), 'img_id': 0}
+    data = test_pipeline(data)
+    data['inputs'] = [data['inputs']]
+    data['data_samples'] = [data['data_samples']]
+    data_sample = model.data_preprocessor(data, False)['data_samples']
+    batch_input_shape = data_sample[0].batch_input_shape
+    return batch_input_shape
+
+
+def pack_data(frame_resize: np.ndarray, batch_input_shape: Tuple[int, int],
+              ori_shape: Tuple[int, int]) -> dict:
+    assert frame_resize.shape[:2] == batch_input_shape
+    data_sample = DetDataSample()
+    data_sample.set_metainfo({
+        'img_shape':
+        batch_input_shape,
+        'ori_shape':
+        ori_shape,
+        'scale_factor': (batch_input_shape[0] / ori_shape[0],
+                         batch_input_shape[1] / ori_shape[1])
+    })
+    frame_resize = torch.from_numpy(frame_resize).permute((2, 0, 1)).cuda()
+    data = {'inputs': [frame_resize], 'data_samples': [data_sample]}
+    return data
+
+
+def main():
+    args = parse_args()
+    assert args.out or args.show, \
+        ('Please specify at least one operation (save/show the '
+         'video) with the argument "--out" or "--show"')
+
+    model = init_detector(args.config, args.checkpoint, device=args.device)
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    # the dataset_meta is loaded from the checkpoint and
+    # then pass to the model in init_detector
+    visualizer.dataset_meta = model.dataset_meta
+
+    if args.nvdecode:
+        VideoCapture = ffmpegcv.VideoCaptureNV
+    else:
+        VideoCapture = ffmpegcv.VideoCapture
+    video_origin = VideoCapture(args.video)
+
+    batch_input_shape = prefetch_batch_input_shape(
+        model, (video_origin.width, video_origin.height))
+    ori_shape = (video_origin.height, video_origin.width)
+    resize_wh = batch_input_shape[::-1]
+    video_resize = VideoCapture(
+        args.video,
+        resize=resize_wh,
+        resize_keepratio=True,
+        resize_keepratioalign='topleft')
+
+    video_writer = None
+    if args.out:
+        video_writer = ffmpegcv.VideoWriter(args.out, fps=video_origin.fps)
+
+    with torch.no_grad():
+        for i, (frame_resize, frame_origin) in enumerate(
+                zip(track_iter_progress(video_resize), video_origin)):
+            data = pack_data(frame_resize, batch_input_shape, ori_shape)
+            result = model.test_step(data)[0]
+
+            visualizer.add_datasample(
+                name='video',
+                image=frame_origin,
+                data_sample=result,
+                draw_gt=False,
+                show=False,
+                pred_score_thr=args.score_thr)
+
+            frame_mask = visualizer.get_image()
+
+            if args.show:
+                cv2.namedWindow('video', 0)
+                mmcv.imshow(frame_mask, 'video', args.wait_time)
+            if args.out:
+                video_writer.write(frame_mask)
+
+    if video_writer:
+        video_writer.release()
+    video_origin.release()
+    video_resize.release()
+
+    cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+    main()
--- a/demo/webcam_demo.py
+++ b/demo/webcam_demo.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import cv2
+import mmcv
+import torch
+
+from mmdet.apis import inference_detector, init_detector
+from mmdet.registry import VISUALIZERS
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDetection webcam demo')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument(
+        '--device', type=str, default='cuda:0', help='CPU/CUDA device option')
+    parser.add_argument(
+        '--camera-id', type=int, default=0, help='camera device id')
+    parser.add_argument(
+        '--score-thr', type=float, default=0.5, help='bbox score threshold')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    # build the model from a config file and a checkpoint file
+    device = torch.device(args.device)
+    model = init_detector(args.config, args.checkpoint, device=device)
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    # the dataset_meta is loaded from the checkpoint and
+    # then pass to the model in init_detector
+    visualizer.dataset_meta = model.dataset_meta
+
+    camera = cv2.VideoCapture(args.camera_id)
+
+    print('Press "Esc", "q" or "Q" to exit.')
+    while True:
+        ret_val, img = camera.read()
+        result = inference_detector(model, img)
+
+        img = mmcv.imconvert(img, 'bgr', 'rgb')
+        visualizer.add_datasample(
+            name='result',
+            image=img,
+            data_sample=result,
+            draw_gt=False,
+            pred_score_thr=args.score_thr,
+            show=False)
+
+        img = visualizer.get_image()
+        img = mmcv.imconvert(img, 'bgr', 'rgb')
+        cv2.imshow('result', img)
+
+        ch = cv2.waitKey(1)
+        if ch == 27 or ch == ord('q') or ch == ord('Q'):
+            break
+
+
+if __name__ == '__main__':
+    main()
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
+ARG PYTORCH="1.9.0"
+ARG CUDA="11.1"
+ARG CUDNN="8"
+
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6+PTX" \
+    TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
+    CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
+    FORCE_CUDA="1"
+
+# Avoid Public GPG key error
+# https://github.com/NVIDIA/nvidia-docker/issues/1631
+RUN rm /etc/apt/sources.list.d/cuda.list \
+    && rm /etc/apt/sources.list.d/nvidia-ml.list \
+    && apt-key del 7fa2af80 \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+
+# (Optional, use Mirror to speed up downloads)
+# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list && \
+#    pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+
+# Install the required packages
+RUN apt-get update \
+    && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install MMEngine and MMCV
+RUN pip install openmim && \
+    mim install "mmengine>=0.7.1" "mmcv>=2.0.0rc4"
+
+# Install MMDetection
+RUN conda clean --all \
+    && git clone https://github.com/open-mmlab/mmdetection.git /mmdetection \
+    && cd /mmdetection \
+    && pip install --no-cache-dir -e .
+
+WORKDIR /mmdetection
--- a/docker/serve/Dockerfile
+++ b/docker/serve/Dockerfile
+ARG PYTORCH="1.9.0"
+ARG CUDA="11.1"
+ARG CUDNN="8"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ARG MMCV="2.0.0rc4"
+ARG MMDET="3.3.0"
+
+ENV PYTHONUNBUFFERED TRUE
+
+# Avoid Public GPG key error
+# https://github.com/NVIDIA/nvidia-docker/issues/1631
+RUN rm /etc/apt/sources.list.d/cuda.list \
+    && rm /etc/apt/sources.list.d/nvidia-ml.list \
+    && apt-key del 7fa2af80 \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+
+# (Optional, use Mirror to speed up downloads)
+# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list
+
+# Install the required packages
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    ca-certificates \
+    g++ \
+    openjdk-11-jre-headless \
+    # MMDet Requirements
+    ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV PATH="/opt/conda/bin:$PATH" \
+    FORCE_CUDA="1"
+
+# TORCHSEVER
+RUN pip install torchserve torch-model-archiver
+
+# MMLAB
+ARG PYTORCH
+ARG CUDA
+RUN pip install mmengine
+RUN ["/bin/bash", "-c", "pip install mmcv==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
+RUN pip install mmdet==${MMDET}
+
+RUN useradd -m model-server \
+    && mkdir -p /home/model-server/tmp
+
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+
+RUN chmod +x /usr/local/bin/entrypoint.sh \
+    && chown -R model-server /home/model-server
+
+COPY config.properties /home/model-server/config.properties
+RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
+
+EXPOSE 8080 8081 8082
+
+USER model-server
+WORKDIR /home/model-server
+ENV TEMP=/home/model-server/tmp
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+CMD ["serve"]
--- a/docker/serve/config.properties
+++ b/docker/serve/config.properties
+inference_address=http://0.0.0.0:8080
+management_address=http://0.0.0.0:8081
+metrics_address=http://0.0.0.0:8082
+model_store=/home/model-server/model-store
+load_models=all
--- a/docker/serve/entrypoint.sh
+++ b/docker/serve/entrypoint.sh
+#!/bin/bash
+set -e
+
+if [[ "$1" = "serve" ]]; then
+    shift 1
+    torchserve --start --ts-config /home/model-server/config.properties
+else
+    eval "$@"
+fi
+
+# prevent docker exit
+tail -f /dev/null
--- a/docker/serve_cn/Dockerfile
+++ b/docker/serve_cn/Dockerfile
+ARG PYTORCH="1.9.0"
+ARG CUDA="11.1"
+ARG CUDNN="8"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+
+ARG MMCV="2.0.0rc4"
+ARG MMDET="3.3.0"
+
+ENV PYTHONUNBUFFERED TRUE
+
+# Avoid Public GPG key error
+# - https://github.com/NVIDIA/nvidia-docker/issues/1631
+RUN rm /etc/apt/sources.list.d/cuda.list \
+  && rm /etc/apt/sources.list.d/nvidia-ml.list \
+  && apt-get update \
+  && apt-get install -y wget \
+  && rm -rf /var/lib/apt/lists/* \
+  && apt-key del 7fa2af80 \
+  && apt-get update && apt-get install -y --no-install-recommends wget \
+  && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
+  && dpkg -i cuda-keyring_1.0-1_all.deb
+# (Optional, use Mirror to speed up downloads)
+# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list
+
+# Install the required packages
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    ca-certificates \
+    g++ \
+    openjdk-11-jre-headless \
+    # MMDet Requirements
+    ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV PATH="/opt/conda/bin:$PATH" \
+    FORCE_CUDA="1"
+
+# TORCHSEVER
+RUN pip install torchserve torch-model-archiver nvgpu -i https://pypi.mirrors.ustc.edu.cn/simple/
+
+# MMLAB
+ARG PYTORCH
+ARG CUDA
+RUN pip install mmengine -i https://pypi.mirrors.ustc.edu.cn/simple/
+RUN ["/bin/bash", "-c", "pip install mmcv==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
+RUN pip install mmdet==${MMDET} -i https://pypi.mirrors.ustc.edu.cn/simple/
+
+RUN useradd -m model-server \
+    && mkdir -p /home/model-server/tmp
+
+COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+
+RUN chmod +x /usr/local/bin/entrypoint.sh \
+    && chown -R model-server /home/model-server
+
+COPY config.properties /home/model-server/config.properties
+RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
+
+EXPOSE 8080 8081 8082
+
+USER model-server
+WORKDIR /home/model-server
+ENV TEMP=/home/model-server/tmp
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+CMD ["serve"]
--- a/docs/en/Makefile
+++ b/docs/en/Makefile
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/en/_static/css/readthedocs.css
+++ b/docs/en/_static/css/readthedocs.css
+.header-logo {
+    background-image: url("../image/mmdet-logo.png");
+    background-size: 156px 40px;
+    height: 40px;
+    width: 156px;
+}
--- a/docs/en/_static/image/mmdet-logo.png
+++ b/docs/en/_static/image/mmdet-logo.png
--- a/docs/en/advanced_guides/conventions.md
+++ b/docs/en/advanced_guides/conventions.md
+# Conventions
+
+Please check the following conventions if you would like to modify MMDetection as your own project.
+
+## About the order of image shape
+
+In OpenMMLab 2.0, to be consistent with the input argument of OpenCV, the argument about image shape in the data transformation pipeline is always in the `(width, height)` order. On the contrary, for computation convenience, the order of the field going through the data pipeline and the model is `(height, width)`. Specifically, in the results processed by each data transform pipeline, the fields and their value meaning is as below:
+
+- img_shape: (height, width)
+- ori_shape: (height, width)
+- pad_shape: (height, width)
+- batch_input_shape: (height, width)
+
+As an example, the initialization arguments of `Mosaic` are as below:
+
+```python
+@TRANSFORMS.register_module()
+class Mosaic(BaseTransform):
+    def __init__(self,
+                img_scale: Tuple[int, int] = (640, 640),
+                center_ratio_range: Tuple[float, float] = (0.5, 1.5),
+                bbox_clip_border: bool = True,
+                pad_val: float = 114.0,
+                prob: float = 1.0) -> None:
+       ...
+
+       # img_scale order should be (width, height)
+       self.img_scale = img_scale
+
+    def transform(self, results: dict) -> dict:
+        ...
+
+        results['img'] = mosaic_img
+        # (height, width)
+        results['img_shape'] = mosaic_img.shape[:2]
+```
+
+## Loss
+
+In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`.
+
+For example, in bbox head,
+
+```python
+class BBoxHead(nn.Module):
+    ...
+    def loss(self, ...):
+        losses = dict()
+        # classification loss
+        losses['loss_cls'] = self.loss_cls(...)
+        # classification accuracy
+        losses['acc'] = accuracy(...)
+        # bbox regression loss
+        losses['loss_bbox'] = self.loss_bbox(...)
+        return losses
+```
+
+`bbox_head.loss()` will be called during model forward.
+The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` .
+Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation,
+`'acc'` will only be used as a metric to monitor training process.
+
+By default, only values whose keys contain `'loss'` will be back propagated.
+This behavior could be changed by modifying `BaseDetector.train_step()`.
+
+## Empty Proposals
+
+In MMDetection, We have added special handling and unit test for empty proposals of two-stage. We need to deal with the empty proposals of the entire batch and single image at the same time. For example, in CascadeRoIHead,
+
+```python
+# simple_test method
+...
+# There is no proposal in the whole batch
+if rois.shape[0] == 0:
+    bbox_results = [[
+        np.zeros((0, 5), dtype=np.float32)
+        for _ in range(self.bbox_head[-1].num_classes)
+    ]] * num_imgs
+    if self.with_mask:
+        mask_classes = self.mask_head[-1].num_classes
+        segm_results = [[[] for _ in range(mask_classes)]
+                        for _ in range(num_imgs)]
+        results = list(zip(bbox_results, segm_results))
+    else:
+        results = bbox_results
+    return results
+...
+
+# There is no proposal in the single image
+for i in range(self.num_stages):
+    ...
+    if i < self.num_stages - 1:
+          for j in range(num_imgs):
+                # Handle empty proposal
+                if rois[j].shape[0] > 0:
+                    bbox_label = cls_score[j][:, :-1].argmax(dim=1)
+                    refine_roi = self.bbox_head[i].regress_by_class(
+                         rois[j], bbox_label, bbox_pred[j], img_metas[j])
+                    refine_roi_list.append(refine_roi)
+```
+
+If you have customized `RoIHead`, you can refer to the above method to deal with empty proposals.
+
+## Coco Panoptic Dataset
+
+In MMDetection, we have supported COCO Panoptic dataset. We clarify a few conventions about the implementation of `CocoPanopticDataset` here.
+
+1. For mmdet\<=2.16.0, the range of foreground and background labels in semantic segmentation are different from the default setting of MMDetection. The label `0` stands for `VOID` label and the category labels start from `1`.
+   Since mmdet=2.17.0, the category labels of semantic segmentation start from `0` and label `255` stands for `VOID` for consistency with labels of bounding boxes.
+   To achieve that, the `Pad` pipeline supports setting the padding value for `seg`.
+2. In the evaluation, the panoptic result is a map with the same shape as the original image. Each value in the result map has the format of `instance_id * INSTANCE_OFFSET + category_id`.
--- a/docs/en/advanced_guides/customize_dataset.md
+++ b/docs/en/advanced_guides/customize_dataset.md
+# Customize Datasets
+
+## Support new data format
+
+To support a new data format, you can either convert them to existing formats (COCO format or PASCAL format) or directly convert them to the middle format. You could also choose to convert them offline (before training by a script) or online (implement a new dataset and do the conversion at training). In MMDetection, we recommend to convert the data into COCO formats and do the conversion offline, thus you only need to modify the config's data annotation paths and classes after the conversion of your data.
+
+### Reorganize new data formats to existing format
+
+The simplest way is to convert your dataset to existing dataset formats (COCO or PASCAL VOC).
+
+The annotation JSON files in COCO format has the following necessary keys:
+
+```python
+'images': [
+    {
+        'file_name': 'COCO_val2014_000000001268.jpg',
+        'height': 427,
+        'width': 640,
+        'id': 1268
+    },
+    ...
+],
+
+'annotations': [
+    {
+        'segmentation': [[192.81,
+            247.09,
+            ...
+            219.03,
+            249.06]],  # If you have mask labels, and it is in polygon XY point coordinate format, you need to ensure that at least 3 point coordinates are included. Otherwise, it is an invalid polygon.
+        'area': 1035.749,
+        'iscrowd': 0,
+        'image_id': 1268,
+        'bbox': [192.81, 224.8, 74.73, 33.43],
+        'category_id': 16,
+        'id': 42986
+    },
+    ...
+],
+
+'categories': [
+    {'id': 0, 'name': 'car'},
+ ]
+```
+
+There are three necessary keys in the JSON file:
+
+- `images`: contains a list of images with their information like `file_name`, `height`, `width`, and `id`.
+- `annotations`: contains the list of instance annotations.
+- `categories`: contains the list of categories names and their ID.
+
+After the data pre-processing, there are two steps for users to train the customized new dataset with existing format (e.g. COCO format):
+
+1. Modify the config file for using the customized dataset.
+2. Check the annotations of the customized dataset.
+
+Here we give an example to show the above two steps, which uses a customized dataset of 5 classes with COCO format to train an existing Cascade Mask R-CNN R50-FPN detector.
+
+#### 1. Modify the config file for using the customized dataset
+
+There are two aspects involved in the modification of config file:
+
+1. The `data` field. Specifically, you need to explicitly add the `metainfo=dict(classes=classes)` fields in `train_dataloader.dataset`, `val_dataloader.dataset` and `test_dataloader.dataset` and `classes` must be a tuple type.
+2. The `num_classes` field in the `model` part. Explicitly over-write all the `num_classes` from default value (e.g. 80 in COCO) to your classes number.
+
+In `configs/my_custom_config.py`:
+
+```python
+
+# the new config inherits the base configs to highlight the necessary modification
+_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
+
+# 1. dataset settings
+dataset_type = 'CocoDataset'
+classes = ('a', 'b', 'c', 'd', 'e')
+data_root='path/to/your/'
+
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    dataset=dict(
+        type=dataset_type,
+        # explicitly add your class names to the field `metainfo`
+        metainfo=dict(classes=classes),
+        data_root=data_root,
+        ann_file='train/annotation_data',
+        data_prefix=dict(img='train/image_data')
+        )
+    )
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    dataset=dict(
+        type=dataset_type,
+        test_mode=True,
+        # explicitly add your class names to the field `metainfo`
+        metainfo=dict(classes=classes),
+        data_root=data_root,
+        ann_file='val/annotation_data',
+        data_prefix=dict(img='val/image_data')
+        )
+    )
+
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    dataset=dict(
+        type=dataset_type,
+        test_mode=True,
+        # explicitly add your class names to the field `metainfo`
+        metainfo=dict(classes=classes),
+        data_root=data_root,
+        ann_file='test/annotation_data',
+        data_prefix=dict(img='test/image_data')
+        )
+    )
+
+# 2. model settings
+
+# explicitly over-write all the `num_classes` field from default 80 to 5.
+model = dict(
+    roi_head=dict(
+        bbox_head=[
+            dict(
+                type='Shared2FCBBoxHead',
+                # explicitly over-write all the `num_classes` field from default 80 to 5.
+                num_classes=5),
+            dict(
+                type='Shared2FCBBoxHead',
+                # explicitly over-write all the `num_classes` field from default 80 to 5.
+                num_classes=5),
+            dict(
+                type='Shared2FCBBoxHead',
+                # explicitly over-write all the `num_classes` field from default 80 to 5.
+                num_classes=5)],
+    # explicitly over-write all the `num_classes` field from default 80 to 5.
+    mask_head=dict(num_classes=5)))
+```
+
+#### 2. Check the annotations of the customized dataset
+
+Assuming your customized dataset is COCO format, make sure you have the correct annotations in the customized dataset:
+
+1. The length for `categories` field in annotations should exactly equal the tuple length of `classes` fields in your config, meaning the number of classes (e.g. 5 in this example).
+2. The `classes` fields in your config file should have exactly the same elements and the same order with the `name` in `categories` of annotations. MMDetection automatically maps the uncontinuous `id` in `categories` to the continuous label indices, so the string order of `name` in `categories` field affects the order of label indices. Meanwhile, the string order of `classes` in config affects the label text during visualization of predicted bounding boxes.
+3. The `category_id` in `annotations` field should be valid, i.e., all values in `category_id` should belong to `id` in `categories`.
+
+Here is a valid example of annotations:
+
+```python
+
+'annotations': [
+    {
+        'segmentation': [[192.81,
+            247.09,
+            ...
+            219.03,
+            249.06]],  # if you have mask labels
+        'area': 1035.749,
+        'iscrowd': 0,
+        'image_id': 1268,
+        'bbox': [192.81, 224.8, 74.73, 33.43],
+        'category_id': 16,
+        'id': 42986
+    },
+    ...
+],
+
+# MMDetection automatically maps the uncontinuous `id` to the continuous label indices.
+'categories': [
+    {'id': 1, 'name': 'a'}, {'id': 3, 'name': 'b'}, {'id': 4, 'name': 'c'}, {'id': 16, 'name': 'd'}, {'id': 17, 'name': 'e'},
+ ]
+```
+
+We use this way to support CityScapes dataset. The script is in [cityscapes.py](../../../tools/dataset_converters/cityscapes.py) and we also provide the finetuning [configs](../../../configs/cityscapes).
+
+**Note**
+
+1. For instance segmentation datasets, **MMDetection only supports evaluating mask AP of dataset in COCO format for now**.
+2. It is recommended to convert the data offline before training, thus you can still use `CocoDataset` and only need to modify the path of annotations and the training classes.
+
+### Reorganize new data format to middle format
+
+It is also fine if you do not want to convert the annotation format to COCO or PASCAL format.
+Actually, we define a simple annotation format in MMEninge's [BaseDataset](https://github.com/open-mmlab/mmengine/blob/main/mmengine/dataset/base_dataset.py#L116) and all existing datasets are
+processed to be compatible with it, either online or offline.
+
+The annotation of the dataset must be in `json` or `yaml`, `yml` or `pickle`, `pkl` format; the dictionary stored in the annotation file must contain two fields `metainfo` and `data_list`.  The `metainfo` is a dictionary, which contains the metadata of the dataset, such as class information; `data_list` is a list, each element in the list is a dictionary, the dictionary defines the raw data of one image, and each raw data contains a or several training/testing samples.
+
+Here is an example.
+
+```python
+{
+    'metainfo':
+        {
+            'classes': ('person', 'bicycle', 'car', 'motorcycle'),
+            ...
+        },
+    'data_list':
+        [
+            {
+                "img_path": "xxx/xxx_1.jpg",
+                "height": 604,
+                "width": 640,
+                "instances":
+                [
+                  {
+                    "bbox": [0, 0, 10, 20],
+                    "bbox_label": 1,
+                    "ignore_flag": 0
+                  },
+                  {
+                    "bbox": [10, 10, 110, 120],
+                    "bbox_label": 2,
+                    "ignore_flag": 0
+                  }
+                ]
+              },
+            {
+                "img_path": "xxx/xxx_2.jpg",
+                "height": 320,
+                "width": 460,
+                "instances":
+                [
+                  {
+                    "bbox": [10, 0, 20, 20],
+                    "bbox_label": 3,
+                    "ignore_flag": 1,
+                  }
+                ]
+              },
+            ...
+        ]
+}
+```
+
+Some datasets may provide annotations like crowd/difficult/ignored bboxes, we use `ignore_flag`to cover them.
+
+After obtaining the above standard data annotation format, you can directly use [BaseDetDataset](../../../mmdet/datasets/base_det_dataset.py#L13) of MMDetection in the configuration , without conversion.
+
+### An example of customized dataset
+
+Assume the annotation is in a new format in text files.
+The bounding boxes annotations are stored in text file `annotation.txt` as the following
+
+```
+#
+000001.jpg
+1280 720
+2
+10 20 40 60 1
+20 40 50 60 2
+#
+000002.jpg
+1280 720
+3
+50 20 40 60 2
+20 40 30 45 2
+30 40 50 60 3
+```
+
+We can create a new dataset in `mmdet/datasets/my_dataset.py` to load the data.
+
+```python
+import mmengine
+
+from mmdet.base_det_dataset import BaseDetDataset
+from mmdet.registry import DATASETS
+
+
+@DATASETS.register_module()
+class MyDataset(BaseDetDataset):
+
+    METAINFO = {
+       'classes': ('person', 'bicycle', 'car', 'motorcycle'),
+        'palette': [(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230)]
+    }
+
+    def load_data_list(self, ann_file):
+        ann_list = mmengine.list_from_file(ann_file)
+
+        data_infos = []
+        for i, ann_line in enumerate(ann_list):
+            if ann_line != '#':
+                continue
+
+            img_shape = ann_list[i + 2].split(' ')
+            width = int(img_shape[0])
+            height = int(img_shape[1])
+            bbox_number = int(ann_list[i + 3])
+
+            instances = []
+            for anns in ann_list[i + 4:i + 4 + bbox_number]:
+                instance = {}
+                instance['bbox'] = [float(ann) for ann in anns.split(' ')[:4]]
+                instance['bbox_label']=int(anns[4])
+ 				instances.append(instance)
+
+            data_infos.append(
+                dict(
+                    img_path=ann_list[i + 1],
+                    img_id=i,
+                    width=width,
+                    height=height,
+                    instances=instances
+                ))
+
+        return data_infos
+```
+
+Then in the config, to use `MyDataset` you can modify the config as the following
+
+```python
+dataset_A_train = dict(
+    type='MyDataset',
+    ann_file = 'image_list.txt',
+    pipeline=train_pipeline
+)
+```
+
+## Customize datasets by dataset wrappers
+
+MMEngine also supports many dataset wrappers to mix the dataset or modify the dataset distribution for training.
+Currently it supports to three dataset wrappers as below:
+
+- `RepeatDataset`: simply repeat the whole dataset.
+- `ClassBalancedDataset`: repeat dataset in a class balanced manner.
+- `ConcatDataset`: concat datasets.
+
+For detailed usage, see [MMEngine Dataset Wrapper](#TODO).
+
+## Modify Dataset Classes
+
+With existing dataset types, we can modify the metainfo of them to train subset of the annotations.
+For example, if you want to train only three classes of the current dataset,
+you can modify the classes of dataset.
+The dataset will filter out the ground truth boxes of other classes automatically.
+
+```python
+classes = ('person', 'bicycle', 'car')
+train_dataloader = dict(
+    dataset=dict(
+        metainfo=dict(classes=classes))
+    )
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=dict(classes=classes))
+    )
+test_dataloader = dict(
+    dataset=dict(
+        metainfo=dict(classes=classes))
+    )
+```
+
+**Note**:
+
+- Before MMDetection v2.5.0, the dataset will filter out the empty GT images automatically if the classes are set and there is no way to disable that through config. This is an undesirable behavior and introduces confusion because if the classes are not set, the dataset only filter the empty GT images when `filter_empty_gt=True` and `test_mode=False`. After MMDetection v2.5.0, we decouple the image filtering process and the classes modification, i.e., the dataset will only filter empty GT images when `filter_cfg=dict(filter_empty_gt=True)` and `test_mode=False`, no matter whether the classes are set. Thus, setting the classes only influences the annotations of classes used for training and users could decide whether to filter empty GT images by themselves.
+- When directly using `BaseDataset` in MMEngine or `BaseDetDataset` in MMDetection, users cannot filter images without GT by modifying the configuration, but it can be solved in an offline way.
+- Please remember to modify the `num_classes` in the head when specifying `classes` in dataset. We implemented [NumClassCheckHook](../../../mmdet/engine/hooks/num_class_check_hook.py) to check whether the numbers are consistent since v2.9.0(after PR#4508).
+
+## COCO Panoptic Dataset
+
+Now we support COCO Panoptic Dataset, the format of panoptic annotations is different from COCO format.
+Both the foreground and the background will exist in the annotation file.
+The annotation json files in COCO Panoptic format has the following necessary keys:
+
+```python
+'images': [
+    {
+        'file_name': '000000001268.jpg',
+        'height': 427,
+        'width': 640,
+        'id': 1268
+    },
+    ...
+]
+
+'annotations': [
+    {
+        'filename': '000000001268.jpg',
+        'image_id': 1268,
+        'segments_info': [
+            {
+                'id':8345037,  # One-to-one correspondence with the id in the annotation map.
+                'category_id': 51,
+                'iscrowd': 0,
+                'bbox': (x1, y1, w, h),  # The bbox of the background is the outer rectangle of its mask.
+                'area': 24315
+            },
+            ...
+        ]
+    },
+    ...
+]
+
+'categories': [  # including both foreground categories and background categories
+    {'id': 0, 'name': 'person'},
+    ...
+ ]
+```
+
+Moreover, the `seg` must be set to the path of the panoptic annotation images.
+
+```python
+dataset_type = 'CocoPanopticDataset'
+data_root='path/to/your/'
+
+train_dataloader = dict(
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img='train/image_data/', seg='train/panoptic/image_annotation_data/')
+    )
+)
+val_dataloader = dict(
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img='val/image_data/', seg='val/panoptic/image_annotation_data/')
+    )
+)
+test_dataloader = dict(
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            img='test/image_data/', seg='test/panoptic/image_annotation_data/')
+    )
+)
+```
--- a/docs/en/advanced_guides/customize_losses.md
+++ b/docs/en/advanced_guides/customize_losses.md
+# Customize Losses
+
+MMDetection provides users with different loss functions. But the default configuration may be not applicable for different datasets or models, so users may want to modify a specific loss to adapt the new situation.
+
+This tutorial first elaborate the computation pipeline of losses, then give some instructions about how to modify each step. The modification can be categorized as tweaking and weighting.
+
+## Computation pipeline of a loss
+
+Given the input prediction and target, as well as the weights, a loss function maps the input tensor to the final loss scalar. The mapping can be divided into five steps:
+
+1. Set the sampling method to sample positive and negative samples.
+
+2. Get **element-wise** or **sample-wise** loss by the loss kernel function.
+
+3. Weighting the loss with a weight tensor **element-wisely**.
+
+4. Reduce the loss tensor to a **scalar**.
+
+5. Weighting the loss with a **scalar**.
+
+## Set sampling method (step 1)
+
+For some loss functions, sampling strategies are needed to avoid imbalance between positive and negative samples.
+
+For example, when using `CrossEntropyLoss` in RPN head, we need to set `RandomSampler` in `train_cfg`
+
+```python
+train_cfg=dict(
+    rpn=dict(
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False))
+```
+
+For some other losses which have positive and negative sample balance mechanism such as Focal Loss, GHMC, and QualityFocalLoss, the sampler is no more necessary.
+
+## Tweaking loss
+
+Tweaking a loss is more related with step 2, 4, 5, and most modifications can be specified in the config.
+Here we take [Focal Loss (FL)](../../../mmdet/models/losses/focal_loss.py) as an example.
+The following code sniper are the construction method and config of FL respectively, they are actually one to one correspondence.
+
+```python
+@LOSSES.register_module()
+class FocalLoss(nn.Module):
+
+    def __init__(self,
+                 use_sigmoid=True,
+                 gamma=2.0,
+                 alpha=0.25,
+                 reduction='mean',
+                 loss_weight=1.0):
+```
+
+```python
+loss_cls=dict(
+    type='FocalLoss',
+    use_sigmoid=True,
+    gamma=2.0,
+    alpha=0.25,
+    loss_weight=1.0)
+```
+
+### Tweaking hyper-parameters (step 2)
+
+`gamma` and `beta` are two hyper-parameters in the Focal Loss. Say if we want to change the value of `gamma` to be 1.5 and `alpha` to be 0.5, then we can specify them in the config as follows:
+
+```python
+loss_cls=dict(
+    type='FocalLoss',
+    use_sigmoid=True,
+    gamma=1.5,
+    alpha=0.5,
+    loss_weight=1.0)
+```
+
+### Tweaking the way of reduction (step 3)
+
+The default way of reduction is `mean` for FL. Say if we want to change the reduction from `mean` to `sum`, we can specify it in the config as follows:
+
+```python
+loss_cls=dict(
+    type='FocalLoss',
+    use_sigmoid=True,
+    gamma=2.0,
+    alpha=0.25,
+    loss_weight=1.0,
+    reduction='sum')
+```
+
+### Tweaking loss weight (step 5)
+
+The loss weight here is a scalar which controls the weight of different losses in multi-task learning, e.g. classification loss and regression loss. Say if we want to change to loss weight of classification loss to be 0.5, we can specify it in the config as follows:
+
+```python
+loss_cls=dict(
+    type='FocalLoss',
+    use_sigmoid=True,
+    gamma=2.0,
+    alpha=0.25,
+    loss_weight=0.5)
+```
+
+## Weighting loss (step 3)
+
+Weighting loss means we re-weight the loss element-wisely. To be more specific, we multiply the loss tensor with a weight tensor which has the same shape. As a result, different entries of the loss can be scaled differently, and so called element-wisely.
+The loss weight varies across different models and highly context related, but overall there are two kinds of loss weights, `label_weights` for classification loss and `bbox_weights` for bbox regression loss. You can find them in the `get_target` method of the corresponding head. Here we take [ATSSHead](../../../mmdet/models/dense_heads/atss_head.py#L322) as an example, which inherit [AnchorHead](../../../mmdet/models/dense_heads/anchor_head.py) but overwrite its `get_targets` method which yields different `label_weights` and `bbox_weights`.
+
+```
+class ATSSHead(AnchorHead):
+
+    ...
+
+    def get_targets(self,
+                    anchor_list,
+                    valid_flag_list,
+                    gt_bboxes_list,
+                    img_metas,
+                    gt_bboxes_ignore_list=None,
+                    gt_labels_list=None,
+                    label_channels=1,
+                    unmap_outputs=True):
+```
--- a/docs/en/advanced_guides/customize_models.md
+++ b/docs/en/advanced_guides/customize_models.md
+# Customize Models
+
+We basically categorize model components into 5 types.
+
+- backbone: usually an FCN network to extract feature maps, e.g., ResNet, MobileNet.
+- neck: the component between backbones and heads, e.g., FPN, PAFPN.
+- head: the component for specific tasks, e.g., bbox prediction and mask prediction.
+- roi extractor: the part for extracting RoI features from feature maps, e.g., RoI Align.
+- loss: the component in head for calculating losses, e.g., FocalLoss, L1Loss, and GHMLoss.
+
+## Develop new components
+
+### Add a new backbone
+
+Here we show how to develop new components with an example of MobileNet.
+
+#### 1. Define a new backbone (e.g. MobileNet)
+
+Create a new file `mmdet/models/backbones/mobilenet.py`.
+
+```python
+import torch.nn as nn
+
+from mmdet.registry import MODELS
+
+
+@MODELS.register_module()
+class MobileNet(nn.Module):
+
+    def __init__(self, arg1, arg2):
+        pass
+
+    def forward(self, x):  # should return a tuple
+        pass
+```
+
+#### 2. Import the module
+
+You can either add the following line to `mmdet/models/backbones/__init__.py`
+
+```python
+from .mobilenet import MobileNet
+```
+
+or alternatively add
+
+```python
+custom_imports = dict(
+    imports=['mmdet.models.backbones.mobilenet'],
+    allow_failed_imports=False)
+```
+
+to the config file to avoid modifying the original code.
+
+#### 3. Use the backbone in your config file
+
+```python
+model = dict(
+    ...
+    backbone=dict(
+        type='MobileNet',
+        arg1=xxx,
+        arg2=xxx),
+    ...
+```
+
+### Add new necks
+
+#### 1. Define a neck (e.g. PAFPN)
+
+Create a new file `mmdet/models/necks/pafpn.py`.
+
+```python
+import torch.nn as nn
+
+from mmdet.registry import MODELS
+
+@MODELS.register_module()
+class PAFPN(nn.Module):
+
+    def __init__(self,
+                in_channels,
+                out_channels,
+                num_outs,
+                start_level=0,
+                end_level=-1,
+                add_extra_convs=False):
+        pass
+
+    def forward(self, inputs):
+        # implementation is ignored
+        pass
+```
+
+#### 2. Import the module
+
+You can either add the following line to `mmdet/models/necks/__init__.py`,
+
+```python
+from .pafpn import PAFPN
+```
+
+or alternatively add
+
+```python
+custom_imports = dict(
+    imports=['mmdet.models.necks.pafpn'],
+    allow_failed_imports=False)
+```
+
+to the config file and avoid modifying the original code.
+
+#### 3. Modify the config file
+
+```python
+neck=dict(
+    type='PAFPN',
+    in_channels=[256, 512, 1024, 2048],
+    out_channels=256,
+    num_outs=5)
+```
+
+### Add new heads
+
+Here we show how to develop a new head with the example of [Double Head R-CNN](https://arxiv.org/abs/1904.06493) as the following.
+
+First, add a new bbox head in `mmdet/models/roi_heads/bbox_heads/double_bbox_head.py`.
+Double Head R-CNN implements a new bbox head for object detection.
+To implement a bbox head, basically we need to implement three functions of the new module as the following.
+
+```python
+from typing import Tuple
+
+import torch.nn as nn
+from mmcv.cnn import ConvModule
+from mmengine.model import BaseModule, ModuleList
+from torch import Tensor
+
+from mmdet.models.backbones.resnet import Bottleneck
+from mmdet.registry import MODELS
+from mmdet.utils import ConfigType, MultiConfig, OptConfigType, OptMultiConfig
+from .bbox_head import BBoxHead
+
+@MODELS.register_module()
+class DoubleConvFCBBoxHead(BBoxHead):
+    r"""Bbox head used in Double-Head R-CNN
+
+    .. code-block:: none
+
+                                          /-> cls
+                      /-> shared convs ->
+                                          \-> reg
+        roi features
+                                          /-> cls
+                      \-> shared fc    ->
+                                          \-> reg
+    """  # noqa: W605
+
+    def __init__(self,
+                 num_convs: int = 0,
+                 num_fcs: int = 0,
+                 conv_out_channels: int = 1024,
+                 fc_out_channels: int = 1024,
+                 conv_cfg: OptConfigType = None,
+                 norm_cfg: ConfigType = dict(type='BN'),
+                 init_cfg: MultiConfig = dict(
+                     type='Normal',
+                     override=[
+                         dict(type='Normal', name='fc_cls', std=0.01),
+                         dict(type='Normal', name='fc_reg', std=0.001),
+                         dict(
+                             type='Xavier',
+                             name='fc_branch',
+                             distribution='uniform')
+                     ]),
+                 **kwargs) -> None:
+        kwargs.setdefault('with_avg_pool', True)
+        super().__init__(init_cfg=init_cfg, **kwargs)
+
+    def forward(self, x_cls: Tensor, x_reg: Tensor) -> Tuple[Tensor]:
+
+```
+
+Second, implement a new RoI Head if it is necessary. We plan to inherit the new `DoubleHeadRoIHead` from `StandardRoIHead`. We can find that a `StandardRoIHead` already implements the following functions.
+
+```python
+from typing import List, Optional, Tuple
+
+import torch
+from torch import Tensor
+
+from mmdet.registry import MODELS, TASK_UTILS
+from mmdet.structures import DetDataSample
+from mmdet.structures.bbox import bbox2roi
+from mmdet.utils import ConfigType, InstanceList
+from ..task_modules.samplers import SamplingResult
+from ..utils import empty_instances, unpack_gt_instances
+from .base_roi_head import BaseRoIHead
+
+
+@MODELS.register_module()
+class StandardRoIHead(BaseRoIHead):
+    """Simplest base roi head including one bbox head and one mask head."""
+
+    def init_assigner_sampler(self) -> None:
+
+    def init_bbox_head(self, bbox_roi_extractor: ConfigType,
+                       bbox_head: ConfigType) -> None:
+
+    def init_mask_head(self, mask_roi_extractor: ConfigType,
+                       mask_head: ConfigType) -> None:
+
+    def forward(self, x: Tuple[Tensor],
+                rpn_results_list: InstanceList) -> tuple:
+
+    def loss(self, x: Tuple[Tensor], rpn_results_list: InstanceList,
+             batch_data_samples: List[DetDataSample]) -> dict:
+
+    def _bbox_forward(self, x: Tuple[Tensor], rois: Tensor) -> dict:
+
+    def bbox_loss(self, x: Tuple[Tensor],
+                  sampling_results: List[SamplingResult]) -> dict:
+
+    def mask_loss(self, x: Tuple[Tensor],
+                  sampling_results: List[SamplingResult], bbox_feats: Tensor,
+                  batch_gt_instances: InstanceList) -> dict:
+
+    def _mask_forward(self,
+                      x: Tuple[Tensor],
+                      rois: Tensor = None,
+                      pos_inds: Optional[Tensor] = None,
+                      bbox_feats: Optional[Tensor] = None) -> dict:
+
+    def predict_bbox(self,
+                     x: Tuple[Tensor],
+                     batch_img_metas: List[dict],
+                     rpn_results_list: InstanceList,
+                     rcnn_test_cfg: ConfigType,
+                     rescale: bool = False) -> InstanceList:
+
+    def predict_mask(self,
+                     x: Tuple[Tensor],
+                     batch_img_metas: List[dict],
+                     results_list: InstanceList,
+                     rescale: bool = False) -> InstanceList:
+
+```
+
+Double Head's modification is mainly in the `bbox_forward` logic, and it inherits other logics from the `StandardRoIHead`. In the `mmdet/models/roi_heads/double_roi_head.py`, we implement the new RoI Head as the following:
+
+```python
+from typing import Tuple
+
+from torch import Tensor
+
+from mmdet.registry import MODELS
+from .standard_roi_head import StandardRoIHead
+
+
+@MODELS.register_module()
+class DoubleHeadRoIHead(StandardRoIHead):
+    """RoI head for `Double Head RCNN <https://arxiv.org/abs/1904.06493>`_.
+
+    Args:
+        reg_roi_scale_factor (float): The scale factor to extend the rois
+            used to extract the regression features.
+    """
+
+    def __init__(self, reg_roi_scale_factor: float, **kwargs):
+        super().__init__(**kwargs)
+        self.reg_roi_scale_factor = reg_roi_scale_factor
+
+    def _bbox_forward(self, x: Tuple[Tensor], rois: Tensor) -> dict:
+        """Box head forward function used in both training and testing.
+
+        Args:
+            x (tuple[Tensor]): List of multi-level img features.
+            rois (Tensor): RoIs with the shape (n, 5) where the first
+                column indicates batch id of each RoI.
+
+        Returns:
+             dict[str, Tensor]: Usually returns a dictionary with keys:
+
+                - `cls_score` (Tensor): Classification scores.
+                - `bbox_pred` (Tensor): Box energies / deltas.
+                - `bbox_feats` (Tensor): Extract bbox RoI features.
+        """
+        bbox_cls_feats = self.bbox_roi_extractor(
+            x[:self.bbox_roi_extractor.num_inputs], rois)
+        bbox_reg_feats = self.bbox_roi_extractor(
+            x[:self.bbox_roi_extractor.num_inputs],
+            rois,
+            roi_scale_factor=self.reg_roi_scale_factor)
+        if self.with_shared_head:
+            bbox_cls_feats = self.shared_head(bbox_cls_feats)
+            bbox_reg_feats = self.shared_head(bbox_reg_feats)
+        cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats)
+
+        bbox_results = dict(
+            cls_score=cls_score,
+            bbox_pred=bbox_pred,
+            bbox_feats=bbox_cls_feats)
+        return bbox_results
+```
+
+Last, the users need to add the module in
+`mmdet/models/bbox_heads/__init__.py` and `mmdet/models/roi_heads/__init__.py` thus the corresponding registry could find and load them.
+
+Alternatively, the users can add
+
+```python
+custom_imports=dict(
+    imports=['mmdet.models.roi_heads.double_roi_head', 'mmdet.models.roi_heads.bbox_heads.double_bbox_head'])
+```
+
+to the config file and achieve the same goal.
+
+The config file of Double Head R-CNN is as the following
+
+```python
+_base_ = '../faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py'
+model = dict(
+    roi_head=dict(
+        type='DoubleHeadRoIHead',
+        reg_roi_scale_factor=1.3,
+        bbox_head=dict(
+            _delete_=True,
+            type='DoubleConvFCBBoxHead',
+            num_convs=4,
+            num_fcs=2,
+            in_channels=256,
+            conv_out_channels=1024,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0),
+            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0))))
+
+```
+
+Since MMDetection 2.0, the config system supports to inherit configs such that the users can focus on the modification.
+The Double Head R-CNN mainly uses a new `DoubleHeadRoIHead` and a new `DoubleConvFCBBoxHead `, the arguments are set according to the `__init__` function of each module.
+
+### Add new loss
+
+Assume you want to add a new loss as `MyLoss`, for bounding box regression.
+To add a new loss function, the users need implement it in `mmdet/models/losses/my_loss.py`.
+The decorator `weighted_loss` enable the loss to be weighted for each element.
+
+```python
+import torch
+import torch.nn as nn
+
+from mmdet.registry import MODELS
+from .utils import weighted_loss
+
+@weighted_loss
+def my_loss(pred, target):
+    assert pred.size() == target.size() and target.numel() > 0
+    loss = torch.abs(pred - target)
+    return loss
+
+@MODELS.register_module()
+class MyLoss(nn.Module):
+
+    def __init__(self, reduction='mean', loss_weight=1.0):
+        super(MyLoss, self).__init__()
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_bbox = self.loss_weight * my_loss(
+            pred, target, weight, reduction=reduction, avg_factor=avg_factor)
+        return loss_bbox
+```
+
+Then the users need to add it in the `mmdet/models/losses/__init__.py`.
+
+```python
+from .my_loss import MyLoss, my_loss
+
+```
+
+Alternatively, you can add
+
+```python
+custom_imports=dict(
+    imports=['mmdet.models.losses.my_loss'])
+```
+
+to the config file and achieve the same goal.
+
+To use it, modify the `loss_xxx` field.
+Since MyLoss is for regression, you need to modify the `loss_bbox` field in the head.
+
+```python
+loss_bbox=dict(type='MyLoss', loss_weight=1.0))
+```
--- a/docs/en/advanced_guides/customize_runtime.md
+++ b/docs/en/advanced_guides/customize_runtime.md
+# Customize Runtime Settings
+
+## Customize optimization settings
+
+Optimization related configuration is now all managed by `optim_wrapper` which usually has three fields: `optimizer`, `paramwise_cfg`, `clip_grad`, refer to [OptimWrapper](https://mmengine.readthedocs.io/en/latest/tutorials/optim_wrapper.md) for more detail. See the example below, where `Adamw` is used as an `optimizer`, the learning rate of the backbone is reduced by a factor of 10, and gradient clipping is added.
+
+```python
+optim_wrapper = dict(
+    type='OptimWrapper',
+    # optimizer
+    optimizer=dict(
+        type='AdamW',
+        lr=0.0001,
+        weight_decay=0.05,
+        eps=1e-8,
+        betas=(0.9, 0.999)),
+
+    # Parameter-level learning rate and weight decay settings
+    paramwise_cfg=dict(
+        custom_keys={
+            'backbone': dict(lr_mult=0.1, decay_mult=1.0),
+        },
+        norm_decay_mult=0.0),
+
+    # gradient clipping
+    clip_grad=dict(max_norm=0.01, norm_type=2))
+```
+
+### Customize optimizer supported by Pytorch
+
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field in `optim_wrapper` field of config files. For example, if you want to use `ADAM` (note that the performance could drop a lot), the modification could be as the following.
+
+```python
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='Adam', lr=0.0003, weight_decay=0.0001))
+```
+
+To modify the learning rate of the model, the users only need to modify the `lr` in `optimizer`. The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+
+### Customize self-implemented optimizer
+
+#### 1. Define a new optimizer
+
+A customized optimizer could be defined as following.
+
+Assume you want to add a optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to create a new directory named `mmdet/engine/optimizers`. And then implement the new optimizer in a file, e.g., in `mmdet/engine/optimizers/my_optimizer.py`:
+
+```python
+from mmdet.registry import OPTIMIZERS
+from torch.optim import Optimizer
+
+
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+
+    def __init__(self, a, b, c)
+
+```
+
+#### 2. Add the optimizer to registry
+
+To find the above module defined above, this module should be imported into the main namespace at first. There are two options to achieve it.
+
+- Modify `mmdet/engine/optimizers/__init__.py` to import it.
+
+  The newly defined module should be imported in `mmdet/engine/optimizers/__init__.py` so that the registry will find the new module and add it:
+
+```python
+from .my_optimizer import MyOptimizer
+```
+
+- Use `custom_imports` in the config to manually import it
+
+```python
+custom_imports = dict(imports=['mmdet.engine.optimizers.my_optimizer'], allow_failed_imports=False)
+```
+
+The module `mmdet.engine.optimizers.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered.
+Note that only the package containing the class `MyOptimizer` should be imported.
+`mmdet.engine.optimizers.my_optimizer.MyOptimizer` **cannot** be imported directly.
+
+Actually users can use a totally different file directory structure using this importing method, as long as the module root can be located in `PYTHONPATH`.
+
+#### 3. Specify the optimizer in the config file
+
+Then you can use `MyOptimizer` in `optimizer` field in `optim_wrapper` field of config files. In the configs, the optimizers are defined by the field `optimizer` like the following:
+
+```python
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
+```
+
+To use your own optimizer, the field can be changed to
+
+```python
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value))
+```
+
+### Customize optimizer wrapper constructor
+
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer wrapper constructor.
+
+```python
+from mmengine.optim import DefaultOptiWrapperConstructor
+
+from mmdet.registry import OPTIM_WRAPPER_CONSTRUCTORS
+from .my_optimizer import MyOptimizer
+
+
+@OPTIM_WRAPPER_CONSTRUCTORS.register_module()
+class MyOptimizerWrapperConstructor(DefaultOptimWrapperConstructor):
+
+    def __init__(self,
+                 optim_wrapper_cfg: dict,
+                 paramwise_cfg: Optional[dict] = None):
+
+    def __call__(self, model: nn.Module) -> OptimWrapper:
+
+        return optim_wrapper
+
+```
+
+The default optimizer wrapper constructor is implemented [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L18), which could also serve as a template for the new optimizer wrapper constructor.
+
+### Additional settings
+
+Tricks not implemented by the optimizer should be implemented through optimizer wrapper constructor (e.g., set parameter-wise learning rates) or hooks. We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings.
+
+- __Use gradient clip to stabilize training__:
+  Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below:
+
+  ```python
+  optim_wrapper = dict(
+      _delete_=True, clip_grad=dict(max_norm=35, norm_type=2))
+  ```
+
+  If your config inherits the base config which already sets the `optim_wrapper`, you might need `_delete_=True` to override the unnecessary settings. See the [config documentation](../user_guides/config.md) for more details.
+
+- __Use momentum schedule to accelerate model convergence__:
+  We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way.
+  Momentum scheduler is usually used with LR scheduler, for example, the following config is used in [3D detection](https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/configs/_base_/schedules/cyclic-20e.py) to accelerate convergence.
+  For more details, please refer to the implementation of [CosineAnnealingLR](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py#L43) and [CosineAnnealingMomentum](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/momentum_scheduler.py#L71).
+
+  ```python
+  param_scheduler = [
+      # learning rate scheduler
+      # During the first 8 epochs, learning rate increases from 0 to lr * 10
+      # during the next 12 epochs, learning rate decreases from lr * 10 to lr * 1e-4
+      dict(
+          type='CosineAnnealingLR',
+          T_max=8,
+          eta_min=lr * 10,
+          begin=0,
+          end=8,
+          by_epoch=True,
+          convert_to_iter_based=True),
+      dict(
+          type='CosineAnnealingLR',
+          T_max=12,
+          eta_min=lr * 1e-4,
+          begin=8,
+          end=20,
+          by_epoch=True,
+          convert_to_iter_based=True),
+      # momentum scheduler
+      # During the first 8 epochs, momentum increases from 0 to 0.85 / 0.95
+      # during the next 12 epochs, momentum increases from 0.85 / 0.95 to 1
+      dict(
+          type='CosineAnnealingMomentum',
+          T_max=8,
+          eta_min=0.85 / 0.95,
+          begin=0,
+          end=8,
+          by_epoch=True,
+          convert_to_iter_based=True),
+      dict(
+          type='CosineAnnealingMomentum',
+          T_max=12,
+          eta_min=1,
+          begin=8,
+          end=20,
+          by_epoch=True,
+          convert_to_iter_based=True)
+  ]
+  ```
+
+## Customize training schedules
+
+By default we use step learning rate with 1x schedule, this calls [MultiStepLR](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py#L139) in MMEngine.
+We support many other learning rate schedule [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py), such as `CosineAnnealingLR` and `PolyLR` schedule. Here are some examples
+
+- Poly schedule:
+
+  ```python
+  param_scheduler = [
+      dict(
+          type='PolyLR',
+          power=0.9,
+          eta_min=1e-4,
+          begin=0,
+          end=8,
+          by_epoch=True)]
+  ```
+
+- ConsineAnnealing schedule:
+
+  ```python
+  param_scheduler = [
+      dict(
+          type='CosineAnnealingLR',
+          T_max=8,
+          eta_min=lr * 1e-5,
+          begin=0,
+          end=8,
+          by_epoch=True)]
+
+  ```
+
+## Customize train loop
+
+By default, `EpochBasedTrainLoop` is used in `train_cfg` and validation is done after every train epoch, as follows.
+
+```python
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_begin=1, val_interval=1)
+```
+
+Actually, both [`IterBasedTrainLoop`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L183%5D) and [`EpochBasedTrainLoop`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L18) support dynamical interval, see the following example.
+
+```python
+# Before 365001th iteration, we do evaluation every 5000 iterations.
+# After 365000th iteration, we do evaluation every 368750 iterations,
+# which means that we do evaluation at the end of training.
+
+interval = 5000
+max_iters = 368750
+dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
+train_cfg = dict(
+    type='IterBasedTrainLoop',
+    max_iters=max_iters,
+    val_interval=interval,
+    dynamic_intervals=dynamic_intervals)
+```
+
+## Customize hooks
+
+### Customize self-implemented hooks
+
+#### 1. Implement a new hook
+
+MMEngine provides many useful [hooks](https://mmengine.readthedocs.io/en/latest/tutorials/hooks.html), but there are some occasions when the users might need to implement a new hook. MMDetection supports customized hooks in training in v3.0 . Thus the users could implement a hook directly in mmdet or their mmdet-based codebases and use the hook by only modifying the config in training.
+Here we give an example of creating a new hook in mmdet and using it in training.
+
+```python
+from mmengine.hooks import Hook
+from mmdet.registry import HOOKS
+
+
+@HOOKS.register_module()
+class MyHook(Hook):
+
+    def __init__(self, a, b):
+
+    def before_run(self, runner) -> None:
+
+    def after_run(self, runner) -> None:
+
+    def before_train(self, runner) -> None:
+
+    def after_train(self, runner) -> None:
+
+    def before_train_epoch(self, runner) -> None:
+
+    def after_train_epoch(self, runner) -> None:
+
+    def before_train_iter(self,
+                          runner,
+                          batch_idx: int,
+                          data_batch: DATA_BATCH = None) -> None:
+
+    def after_train_iter(self,
+                         runner,
+                         batch_idx: int,
+                         data_batch: DATA_BATCH = None,
+                         outputs: Optional[dict] = None) -> None:
+```
+
+Depending on the functionality of the hook, the users need to specify what the hook will do at each stage of the training in `before_run`, `after_run`, `before_train`, `after_train` , `before_train_epoch`, `after_train_epoch`, `before_train_iter`, and `after_train_iter`.  There are more points where hooks can be inserted, refer to [base hook class](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/hook.py#L9) for more detail.
+
+#### 2. Register the new hook
+
+Then we need to make `MyHook` imported. Assuming the file is in `mmdet/engine/hooks/my_hook.py` there are two ways to do that:
+
+- Modify `mmdet/engine/hooks/__init__.py` to import it.
+
+  The newly defined module should be imported in `mmdet/engine/hooks/__init__.py` so that the registry will find the new module and add it:
+
+```python
+from .my_hook import MyHook
+```
+
+- Use `custom_imports` in the config to manually import it
+
+```python
+custom_imports = dict(imports=['mmdet.engine.hooks.my_hook'], allow_failed_imports=False)
+```
+
+#### 3. Modify the config
+
+```python
+custom_hooks = [
+    dict(type='MyHook', a=a_value, b=b_value)
+]
+```
+
+You can also set the priority of the hook by adding key `priority` to `'NORMAL'` or `'HIGHEST'` as below
+
+```python
+custom_hooks = [
+    dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL')
+]
+```
+
+By default the hook's priority is set as `NORMAL` during registration.
+
+### Use hooks implemented in MMDetection
+
+If the hook is already implemented in MMDectection, you can directly modify the config to use the hook as below
+
+#### Example: `NumClassCheckHook`
+
+We implement a customized hook named [NumClassCheckHook](../../../mmdet/engine/hooks/num_class_check_hook.py) to check whether the `num_classes` in head matches the length of `classes` in the metainfo of `dataset`.
+
+We set it in [default_runtime.py](../../../configs/_base_/default_runtime.py).
+
+```python
+custom_hooks = [dict(type='NumClassCheckHook')]
+```
+
+### Modify default runtime hooks
+
+There are some common hooks that are registered through `default_hooks`, they are
+
+- `IterTimerHook`: A hook that logs 'data_time' for loading data and 'time' for a model train step.
+- `LoggerHook`: A hook that Collect logs from different components of `Runner` and write them to terminal, JSON file, tensorboard and wandb .etc.
+- `ParamSchedulerHook`: A hook to update some hyper-parameters in optimizer, e.g., learning rate and momentum.
+- `CheckpointHook`: A hook that saves checkpoints periodically.
+- `DistSamplerSeedHook`: A hook that sets the seed for sampler and batch_sampler.
+- `DetVisualizationHook`: A hook used to visualize validation and testing process prediction results.
+
+`IterTimerHook`, `ParamSchedulerHook` and `DistSamplerSeedHook` are simple and no need to be modified usually, so here we reveals how what we can do with `LoggerHook`, `CheckpointHook` and `DetVisualizationHook`.
+
+#### CheckpointHook
+
+Except saving checkpoints periodically, [`CheckpointHook`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py#L19) provides other options such as `max_keep_ckpts`, `save_optimizer` and etc. The users could set `max_keep_ckpts` to only save small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`. More details of the arguments are [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py#L19)
+
+```python
+default_hooks = dict(
+    checkpoint=dict(
+        type='CheckpointHook',
+        interval=1,
+        max_keep_ckpts=3,
+        save_optimizer=True))
+```
+
+#### LoggerHook
+
+The `LoggerHook` enables to set intervals. And the detail usages can be found in the [docstring](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py#L18).
+
+```python
+default_hooks = dict(logger=dict(type='LoggerHook', interval=50))
+```
+
+#### DetVisualizationHook
+
+`DetVisualizationHook` use `DetLocalVisualizer` to visualize prediction results, and `DetLocalVisualizer` current supports different backends, e.g., `TensorboardVisBackend` and `WandbVisBackend` (see [docstring](https://github.com/open-mmlab/mmengine/blob/main/mmengine/visualization/vis_backend.py) for more detail). The users could add multi backbends to do visualization, as follows.
+
+```python
+default_hooks = dict(
+    visualization=dict(type='DetVisualizationHook', draw=True))
+
+vis_backends = [dict(type='LocalVisBackend'),
+                dict(type='TensorboardVisBackend')]
+visualizer = dict(
+    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+```
--- a/docs/en/advanced_guides/customize_transforms.md
+++ b/docs/en/advanced_guides/customize_transforms.md
+# Customize Data Pipelines
+
+1. Write a new transform in a file, e.g., in `my_pipeline.py`. It takes a dict as input and returns a dict.
+
+   ```python
+   import random
+   from mmcv.transforms import BaseTransform
+   from mmdet.registry import TRANSFORMS
+
+
+   @TRANSFORMS.register_module()
+   class MyTransform(BaseTransform):
+       """Add your transform
+
+       Args:
+           p (float): Probability of shifts. Default 0.5.
+       """
+
+       def __init__(self, prob=0.5):
+           self.prob = prob
+
+       def transform(self, results):
+           if random.random() > self.prob:
+               results['dummy'] = True
+           return results
+   ```
+
+2. Import and use the pipeline in your config file.
+   Make sure the import is relative to where your train script is located.
+
+   ```python
+   custom_imports = dict(imports=['path.to.my_pipeline'], allow_failed_imports=False)
+
+   train_pipeline = [
+       dict(type='LoadImageFromFile'),
+       dict(type='LoadAnnotations', with_bbox=True),
+       dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+       dict(type='RandomFlip', prob=0.5),
+       dict(type='MyTransform', prob=0.2),
+       dict(type='PackDetInputs')
+   ]
+   ```
+
+3. Visualize the output of your transforms pipeline
+
+   To visualize the output of your transforms pipeline, `tools/misc/browse_dataset.py`
+   can help the user to browse a detection dataset (both images and bounding box annotations)
+   visually, or save the image to a designated directory. More details can refer to
+   [visualization documentation](../user_guides/visualization.md)
--- a/docs/en/advanced_guides/data_flow.md
+++ b/docs/en/advanced_guides/data_flow.md
+# Data Flow
--- a/docs/en/advanced_guides/datasets.md
+++ b/docs/en/advanced_guides/datasets.md
+# Datasets
--- a/docs/en/advanced_guides/engine.md
+++ b/docs/en/advanced_guides/engine.md
+# Engine
--- a/docs/en/advanced_guides/evaluation.md
+++ b/docs/en/advanced_guides/evaluation.md
+# Evaluation