Commit a8562a56 authored by luopl's avatar luopl
Browse files

Initial commit

parents
Pipeline #1564 canceled with stages
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
from typing import Tuple
import cv2
import mmcv
import numpy as np
import torch
import torch.nn as nn
from mmcv.transforms import Compose
from mmengine.utils import track_iter_progress
from mmdet.apis import init_detector
from mmdet.registry import VISUALIZERS
from mmdet.structures import DetDataSample
try:
import ffmpegcv
except ImportError:
raise ImportError(
'Please install ffmpegcv with:\n\n pip install ffmpegcv')
def parse_args():
parser = argparse.ArgumentParser(
description='MMDetection video demo with GPU acceleration')
parser.add_argument('video', help='Video file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.3, help='Bbox score threshold')
parser.add_argument('--out', type=str, help='Output video file')
parser.add_argument('--show', action='store_true', help='Show video')
parser.add_argument(
'--nvdecode', action='store_true', help='Use NVIDIA decoder')
parser.add_argument(
'--wait-time',
type=float,
default=1,
help='The interval of show (s), 0 is block')
args = parser.parse_args()
return args
def prefetch_batch_input_shape(model: nn.Module, ori_wh: Tuple[int,
int]) -> dict:
cfg = model.cfg
w, h = ori_wh
cfg.test_dataloader.dataset.pipeline[0].type = 'LoadImageFromNDArray'
test_pipeline = Compose(cfg.test_dataloader.dataset.pipeline)
data = {'img': np.zeros((h, w, 3), dtype=np.uint8), 'img_id': 0}
data = test_pipeline(data)
data['inputs'] = [data['inputs']]
data['data_samples'] = [data['data_samples']]
data_sample = model.data_preprocessor(data, False)['data_samples']
batch_input_shape = data_sample[0].batch_input_shape
return batch_input_shape
def pack_data(frame_resize: np.ndarray, batch_input_shape: Tuple[int, int],
ori_shape: Tuple[int, int]) -> dict:
assert frame_resize.shape[:2] == batch_input_shape
data_sample = DetDataSample()
data_sample.set_metainfo({
'img_shape':
batch_input_shape,
'ori_shape':
ori_shape,
'scale_factor': (batch_input_shape[0] / ori_shape[0],
batch_input_shape[1] / ori_shape[1])
})
frame_resize = torch.from_numpy(frame_resize).permute((2, 0, 1)).cuda()
data = {'inputs': [frame_resize], 'data_samples': [data_sample]}
return data
def main():
args = parse_args()
assert args.out or args.show, \
('Please specify at least one operation (save/show the '
'video) with the argument "--out" or "--show"')
model = init_detector(args.config, args.checkpoint, device=args.device)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
if args.nvdecode:
VideoCapture = ffmpegcv.VideoCaptureNV
else:
VideoCapture = ffmpegcv.VideoCapture
video_origin = VideoCapture(args.video)
batch_input_shape = prefetch_batch_input_shape(
model, (video_origin.width, video_origin.height))
ori_shape = (video_origin.height, video_origin.width)
resize_wh = batch_input_shape[::-1]
video_resize = VideoCapture(
args.video,
resize=resize_wh,
resize_keepratio=True,
resize_keepratioalign='topleft')
video_writer = None
if args.out:
video_writer = ffmpegcv.VideoWriter(args.out, fps=video_origin.fps)
with torch.no_grad():
for i, (frame_resize, frame_origin) in enumerate(
zip(track_iter_progress(video_resize), video_origin)):
data = pack_data(frame_resize, batch_input_shape, ori_shape)
result = model.test_step(data)[0]
visualizer.add_datasample(
name='video',
image=frame_origin,
data_sample=result,
draw_gt=False,
show=False,
pred_score_thr=args.score_thr)
frame_mask = visualizer.get_image()
if args.show:
cv2.namedWindow('video', 0)
mmcv.imshow(frame_mask, 'video', args.wait_time)
if args.out:
video_writer.write(frame_mask)
if video_writer:
video_writer.release()
video_origin.release()
video_resize.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import cv2
import mmcv
import torch
from mmdet.apis import inference_detector, init_detector
from mmdet.registry import VISUALIZERS
def parse_args():
parser = argparse.ArgumentParser(description='MMDetection webcam demo')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--device', type=str, default='cuda:0', help='CPU/CUDA device option')
parser.add_argument(
'--camera-id', type=int, default=0, help='camera device id')
parser.add_argument(
'--score-thr', type=float, default=0.5, help='bbox score threshold')
args = parser.parse_args()
return args
def main():
args = parse_args()
# build the model from a config file and a checkpoint file
device = torch.device(args.device)
model = init_detector(args.config, args.checkpoint, device=device)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
visualizer.dataset_meta = model.dataset_meta
camera = cv2.VideoCapture(args.camera_id)
print('Press "Esc", "q" or "Q" to exit.')
while True:
ret_val, img = camera.read()
result = inference_detector(model, img)
img = mmcv.imconvert(img, 'bgr', 'rgb')
visualizer.add_datasample(
name='result',
image=img,
data_sample=result,
draw_gt=False,
pred_score_thr=args.score_thr,
show=False)
img = visualizer.get_image()
img = mmcv.imconvert(img, 'bgr', 'rgb')
cv2.imshow('result', img)
ch = cv2.waitKey(1)
if ch == 27 or ch == ord('q') or ch == ord('Q'):
break
if __name__ == '__main__':
main()
ARG PYTORCH="1.9.0"
ARG CUDA="11.1"
ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6+PTX" \
TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
FORCE_CUDA="1"
# Avoid Public GPG key error
# https://github.com/NVIDIA/nvidia-docker/issues/1631
RUN rm /etc/apt/sources.list.d/cuda.list \
&& rm /etc/apt/sources.list.d/nvidia-ml.list \
&& apt-key del 7fa2af80 \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
# (Optional, use Mirror to speed up downloads)
# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list && \
# pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
# Install the required packages
RUN apt-get update \
&& apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install MMEngine and MMCV
RUN pip install openmim && \
mim install "mmengine>=0.7.1" "mmcv>=2.0.0rc4"
# Install MMDetection
RUN conda clean --all \
&& git clone https://github.com/open-mmlab/mmdetection.git /mmdetection \
&& cd /mmdetection \
&& pip install --no-cache-dir -e .
WORKDIR /mmdetection
ARG PYTORCH="1.9.0"
ARG CUDA="11.1"
ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ARG MMCV="2.0.0rc4"
ARG MMDET="3.3.0"
ENV PYTHONUNBUFFERED TRUE
# Avoid Public GPG key error
# https://github.com/NVIDIA/nvidia-docker/issues/1631
RUN rm /etc/apt/sources.list.d/cuda.list \
&& rm /etc/apt/sources.list.d/nvidia-ml.list \
&& apt-key del 7fa2af80 \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
# (Optional, use Mirror to speed up downloads)
# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list
# Install the required packages
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
g++ \
openjdk-11-jre-headless \
# MMDet Requirements
ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& rm -rf /var/lib/apt/lists/*
ENV PATH="/opt/conda/bin:$PATH" \
FORCE_CUDA="1"
# TORCHSEVER
RUN pip install torchserve torch-model-archiver
# MMLAB
ARG PYTORCH
ARG CUDA
RUN pip install mmengine
RUN ["/bin/bash", "-c", "pip install mmcv==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
RUN pip install mmdet==${MMDET}
RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh \
&& chown -R model-server /home/model-server
COPY config.properties /home/model-server/config.properties
RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
EXPOSE 8080 8081 8082
USER model-server
WORKDIR /home/model-server
ENV TEMP=/home/model-server/tmp
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["serve"]
inference_address=http://0.0.0.0:8080
management_address=http://0.0.0.0:8081
metrics_address=http://0.0.0.0:8082
model_store=/home/model-server/model-store
load_models=all
#!/bin/bash
set -e
if [[ "$1" = "serve" ]]; then
shift 1
torchserve --start --ts-config /home/model-server/config.properties
else
eval "$@"
fi
# prevent docker exit
tail -f /dev/null
ARG PYTORCH="1.9.0"
ARG CUDA="11.1"
ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ARG MMCV="2.0.0rc4"
ARG MMDET="3.3.0"
ENV PYTHONUNBUFFERED TRUE
# Avoid Public GPG key error
# - https://github.com/NVIDIA/nvidia-docker/issues/1631
RUN rm /etc/apt/sources.list.d/cuda.list \
&& rm /etc/apt/sources.list.d/nvidia-ml.list \
&& apt-get update \
&& apt-get install -y wget \
&& rm -rf /var/lib/apt/lists/* \
&& apt-key del 7fa2af80 \
&& apt-get update && apt-get install -y --no-install-recommends wget \
&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
&& dpkg -i cuda-keyring_1.0-1_all.deb
# (Optional, use Mirror to speed up downloads)
# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list
# Install the required packages
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
g++ \
openjdk-11-jre-headless \
# MMDet Requirements
ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& rm -rf /var/lib/apt/lists/*
ENV PATH="/opt/conda/bin:$PATH" \
FORCE_CUDA="1"
# TORCHSEVER
RUN pip install torchserve torch-model-archiver nvgpu -i https://pypi.mirrors.ustc.edu.cn/simple/
# MMLAB
ARG PYTORCH
ARG CUDA
RUN pip install mmengine -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN ["/bin/bash", "-c", "pip install mmcv==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
RUN pip install mmdet==${MMDET} -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh \
&& chown -R model-server /home/model-server
COPY config.properties /home/model-server/config.properties
RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
EXPOSE 8080 8081 8082
USER model-server
WORKDIR /home/model-server
ENV TEMP=/home/model-server/tmp
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["serve"]
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.header-logo {
background-image: url("../image/mmdet-logo.png");
background-size: 156px 40px;
height: 40px;
width: 156px;
}
# Conventions
Please check the following conventions if you would like to modify MMDetection as your own project.
## About the order of image shape
In OpenMMLab 2.0, to be consistent with the input argument of OpenCV, the argument about image shape in the data transformation pipeline is always in the `(width, height)` order. On the contrary, for computation convenience, the order of the field going through the data pipeline and the model is `(height, width)`. Specifically, in the results processed by each data transform pipeline, the fields and their value meaning is as below:
- img_shape: (height, width)
- ori_shape: (height, width)
- pad_shape: (height, width)
- batch_input_shape: (height, width)
As an example, the initialization arguments of `Mosaic` are as below:
```python
@TRANSFORMS.register_module()
class Mosaic(BaseTransform):
def __init__(self,
img_scale: Tuple[int, int] = (640, 640),
center_ratio_range: Tuple[float, float] = (0.5, 1.5),
bbox_clip_border: bool = True,
pad_val: float = 114.0,
prob: float = 1.0) -> None:
...
# img_scale order should be (width, height)
self.img_scale = img_scale
def transform(self, results: dict) -> dict:
...
results['img'] = mosaic_img
# (height, width)
results['img_shape'] = mosaic_img.shape[:2]
```
## Loss
In MMDetection, a `dict` containing losses and metrics will be returned by `model(**data)`.
For example, in bbox head,
```python
class BBoxHead(nn.Module):
...
def loss(self, ...):
losses = dict()
# classification loss
losses['loss_cls'] = self.loss_cls(...)
# classification accuracy
losses['acc'] = accuracy(...)
# bbox regression loss
losses['loss_bbox'] = self.loss_bbox(...)
return losses
```
`bbox_head.loss()` will be called during model forward.
The returned dict contains `'loss_bbox'`, `'loss_cls'`, `'acc'` .
Only `'loss_bbox'`, `'loss_cls'` will be used during back propagation,
`'acc'` will only be used as a metric to monitor training process.
By default, only values whose keys contain `'loss'` will be back propagated.
This behavior could be changed by modifying `BaseDetector.train_step()`.
## Empty Proposals
In MMDetection, We have added special handling and unit test for empty proposals of two-stage. We need to deal with the empty proposals of the entire batch and single image at the same time. For example, in CascadeRoIHead,
```python
# simple_test method
...
# There is no proposal in the whole batch
if rois.shape[0] == 0:
bbox_results = [[
np.zeros((0, 5), dtype=np.float32)
for _ in range(self.bbox_head[-1].num_classes)
]] * num_imgs
if self.with_mask:
mask_classes = self.mask_head[-1].num_classes
segm_results = [[[] for _ in range(mask_classes)]
for _ in range(num_imgs)]
results = list(zip(bbox_results, segm_results))
else:
results = bbox_results
return results
...
# There is no proposal in the single image
for i in range(self.num_stages):
...
if i < self.num_stages - 1:
for j in range(num_imgs):
# Handle empty proposal
if rois[j].shape[0] > 0:
bbox_label = cls_score[j][:, :-1].argmax(dim=1)
refine_roi = self.bbox_head[i].regress_by_class(
rois[j], bbox_label, bbox_pred[j], img_metas[j])
refine_roi_list.append(refine_roi)
```
If you have customized `RoIHead`, you can refer to the above method to deal with empty proposals.
## Coco Panoptic Dataset
In MMDetection, we have supported COCO Panoptic dataset. We clarify a few conventions about the implementation of `CocoPanopticDataset` here.
1. For mmdet\<=2.16.0, the range of foreground and background labels in semantic segmentation are different from the default setting of MMDetection. The label `0` stands for `VOID` label and the category labels start from `1`.
Since mmdet=2.17.0, the category labels of semantic segmentation start from `0` and label `255` stands for `VOID` for consistency with labels of bounding boxes.
To achieve that, the `Pad` pipeline supports setting the padding value for `seg`.
2. In the evaluation, the panoptic result is a map with the same shape as the original image. Each value in the result map has the format of `instance_id * INSTANCE_OFFSET + category_id`.
# Customize Datasets
## Support new data format
To support a new data format, you can either convert them to existing formats (COCO format or PASCAL format) or directly convert them to the middle format. You could also choose to convert them offline (before training by a script) or online (implement a new dataset and do the conversion at training). In MMDetection, we recommend to convert the data into COCO formats and do the conversion offline, thus you only need to modify the config's data annotation paths and classes after the conversion of your data.
### Reorganize new data formats to existing format
The simplest way is to convert your dataset to existing dataset formats (COCO or PASCAL VOC).
The annotation JSON files in COCO format has the following necessary keys:
```python
'images': [
{
'file_name': 'COCO_val2014_000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
],
'annotations': [
{
'segmentation': [[192.81,
247.09,
...
219.03,
249.06]], # If you have mask labels, and it is in polygon XY point coordinate format, you need to ensure that at least 3 point coordinates are included. Otherwise, it is an invalid polygon.
'area': 1035.749,
'iscrowd': 0,
'image_id': 1268,
'bbox': [192.81, 224.8, 74.73, 33.43],
'category_id': 16,
'id': 42986
},
...
],
'categories': [
{'id': 0, 'name': 'car'},
]
```
There are three necessary keys in the JSON file:
- `images`: contains a list of images with their information like `file_name`, `height`, `width`, and `id`.
- `annotations`: contains the list of instance annotations.
- `categories`: contains the list of categories names and their ID.
After the data pre-processing, there are two steps for users to train the customized new dataset with existing format (e.g. COCO format):
1. Modify the config file for using the customized dataset.
2. Check the annotations of the customized dataset.
Here we give an example to show the above two steps, which uses a customized dataset of 5 classes with COCO format to train an existing Cascade Mask R-CNN R50-FPN detector.
#### 1. Modify the config file for using the customized dataset
There are two aspects involved in the modification of config file:
1. The `data` field. Specifically, you need to explicitly add the `metainfo=dict(classes=classes)` fields in `train_dataloader.dataset`, `val_dataloader.dataset` and `test_dataloader.dataset` and `classes` must be a tuple type.
2. The `num_classes` field in the `model` part. Explicitly over-write all the `num_classes` from default value (e.g. 80 in COCO) to your classes number.
In `configs/my_custom_config.py`:
```python
# the new config inherits the base configs to highlight the necessary modification
_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
# 1. dataset settings
dataset_type = 'CocoDataset'
classes = ('a', 'b', 'c', 'd', 'e')
data_root='path/to/your/'
train_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(
type=dataset_type,
# explicitly add your class names to the field `metainfo`
metainfo=dict(classes=classes),
data_root=data_root,
ann_file='train/annotation_data',
data_prefix=dict(img='train/image_data')
)
)
val_dataloader = dict(
batch_size=1,
num_workers=2,
dataset=dict(
type=dataset_type,
test_mode=True,
# explicitly add your class names to the field `metainfo`
metainfo=dict(classes=classes),
data_root=data_root,
ann_file='val/annotation_data',
data_prefix=dict(img='val/image_data')
)
)
test_dataloader = dict(
batch_size=1,
num_workers=2,
dataset=dict(
type=dataset_type,
test_mode=True,
# explicitly add your class names to the field `metainfo`
metainfo=dict(classes=classes),
data_root=data_root,
ann_file='test/annotation_data',
data_prefix=dict(img='test/image_data')
)
)
# 2. model settings
# explicitly over-write all the `num_classes` field from default 80 to 5.
model = dict(
roi_head=dict(
bbox_head=[
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5),
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5),
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5)],
# explicitly over-write all the `num_classes` field from default 80 to 5.
mask_head=dict(num_classes=5)))
```
#### 2. Check the annotations of the customized dataset
Assuming your customized dataset is COCO format, make sure you have the correct annotations in the customized dataset:
1. The length for `categories` field in annotations should exactly equal the tuple length of `classes` fields in your config, meaning the number of classes (e.g. 5 in this example).
2. The `classes` fields in your config file should have exactly the same elements and the same order with the `name` in `categories` of annotations. MMDetection automatically maps the uncontinuous `id` in `categories` to the continuous label indices, so the string order of `name` in `categories` field affects the order of label indices. Meanwhile, the string order of `classes` in config affects the label text during visualization of predicted bounding boxes.
3. The `category_id` in `annotations` field should be valid, i.e., all values in `category_id` should belong to `id` in `categories`.
Here is a valid example of annotations:
```python
'annotations': [
{
'segmentation': [[192.81,
247.09,
...
219.03,
249.06]], # if you have mask labels
'area': 1035.749,
'iscrowd': 0,
'image_id': 1268,
'bbox': [192.81, 224.8, 74.73, 33.43],
'category_id': 16,
'id': 42986
},
...
],
# MMDetection automatically maps the uncontinuous `id` to the continuous label indices.
'categories': [
{'id': 1, 'name': 'a'}, {'id': 3, 'name': 'b'}, {'id': 4, 'name': 'c'}, {'id': 16, 'name': 'd'}, {'id': 17, 'name': 'e'},
]
```
We use this way to support CityScapes dataset. The script is in [cityscapes.py](../../../tools/dataset_converters/cityscapes.py) and we also provide the finetuning [configs](../../../configs/cityscapes).
**Note**
1. For instance segmentation datasets, **MMDetection only supports evaluating mask AP of dataset in COCO format for now**.
2. It is recommended to convert the data offline before training, thus you can still use `CocoDataset` and only need to modify the path of annotations and the training classes.
### Reorganize new data format to middle format
It is also fine if you do not want to convert the annotation format to COCO or PASCAL format.
Actually, we define a simple annotation format in MMEninge's [BaseDataset](https://github.com/open-mmlab/mmengine/blob/main/mmengine/dataset/base_dataset.py#L116) and all existing datasets are
processed to be compatible with it, either online or offline.
The annotation of the dataset must be in `json` or `yaml`, `yml` or `pickle`, `pkl` format; the dictionary stored in the annotation file must contain two fields `metainfo` and `data_list`. The `metainfo` is a dictionary, which contains the metadata of the dataset, such as class information; `data_list` is a list, each element in the list is a dictionary, the dictionary defines the raw data of one image, and each raw data contains a or several training/testing samples.
Here is an example.
```python
{
'metainfo':
{
'classes': ('person', 'bicycle', 'car', 'motorcycle'),
...
},
'data_list':
[
{
"img_path": "xxx/xxx_1.jpg",
"height": 604,
"width": 640,
"instances":
[
{
"bbox": [0, 0, 10, 20],
"bbox_label": 1,
"ignore_flag": 0
},
{
"bbox": [10, 10, 110, 120],
"bbox_label": 2,
"ignore_flag": 0
}
]
},
{
"img_path": "xxx/xxx_2.jpg",
"height": 320,
"width": 460,
"instances":
[
{
"bbox": [10, 0, 20, 20],
"bbox_label": 3,
"ignore_flag": 1,
}
]
},
...
]
}
```
Some datasets may provide annotations like crowd/difficult/ignored bboxes, we use `ignore_flag`to cover them.
After obtaining the above standard data annotation format, you can directly use [BaseDetDataset](../../../mmdet/datasets/base_det_dataset.py#L13) of MMDetection in the configuration , without conversion.
### An example of customized dataset
Assume the annotation is in a new format in text files.
The bounding boxes annotations are stored in text file `annotation.txt` as the following
```
#
000001.jpg
1280 720
2
10 20 40 60 1
20 40 50 60 2
#
000002.jpg
1280 720
3
50 20 40 60 2
20 40 30 45 2
30 40 50 60 3
```
We can create a new dataset in `mmdet/datasets/my_dataset.py` to load the data.
```python
import mmengine
from mmdet.base_det_dataset import BaseDetDataset
from mmdet.registry import DATASETS
@DATASETS.register_module()
class MyDataset(BaseDetDataset):
METAINFO = {
'classes': ('person', 'bicycle', 'car', 'motorcycle'),
'palette': [(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230)]
}
def load_data_list(self, ann_file):
ann_list = mmengine.list_from_file(ann_file)
data_infos = []
for i, ann_line in enumerate(ann_list):
if ann_line != '#':
continue
img_shape = ann_list[i + 2].split(' ')
width = int(img_shape[0])
height = int(img_shape[1])
bbox_number = int(ann_list[i + 3])
instances = []
for anns in ann_list[i + 4:i + 4 + bbox_number]:
instance = {}
instance['bbox'] = [float(ann) for ann in anns.split(' ')[:4]]
instance['bbox_label']=int(anns[4])
instances.append(instance)
data_infos.append(
dict(
img_path=ann_list[i + 1],
img_id=i,
width=width,
height=height,
instances=instances
))
return data_infos
```
Then in the config, to use `MyDataset` you can modify the config as the following
```python
dataset_A_train = dict(
type='MyDataset',
ann_file = 'image_list.txt',
pipeline=train_pipeline
)
```
## Customize datasets by dataset wrappers
MMEngine also supports many dataset wrappers to mix the dataset or modify the dataset distribution for training.
Currently it supports to three dataset wrappers as below:
- `RepeatDataset`: simply repeat the whole dataset.
- `ClassBalancedDataset`: repeat dataset in a class balanced manner.
- `ConcatDataset`: concat datasets.
For detailed usage, see [MMEngine Dataset Wrapper](#TODO).
## Modify Dataset Classes
With existing dataset types, we can modify the metainfo of them to train subset of the annotations.
For example, if you want to train only three classes of the current dataset,
you can modify the classes of dataset.
The dataset will filter out the ground truth boxes of other classes automatically.
```python
classes = ('person', 'bicycle', 'car')
train_dataloader = dict(
dataset=dict(
metainfo=dict(classes=classes))
)
val_dataloader = dict(
dataset=dict(
metainfo=dict(classes=classes))
)
test_dataloader = dict(
dataset=dict(
metainfo=dict(classes=classes))
)
```
**Note**:
- Before MMDetection v2.5.0, the dataset will filter out the empty GT images automatically if the classes are set and there is no way to disable that through config. This is an undesirable behavior and introduces confusion because if the classes are not set, the dataset only filter the empty GT images when `filter_empty_gt=True` and `test_mode=False`. After MMDetection v2.5.0, we decouple the image filtering process and the classes modification, i.e., the dataset will only filter empty GT images when `filter_cfg=dict(filter_empty_gt=True)` and `test_mode=False`, no matter whether the classes are set. Thus, setting the classes only influences the annotations of classes used for training and users could decide whether to filter empty GT images by themselves.
- When directly using `BaseDataset` in MMEngine or `BaseDetDataset` in MMDetection, users cannot filter images without GT by modifying the configuration, but it can be solved in an offline way.
- Please remember to modify the `num_classes` in the head when specifying `classes` in dataset. We implemented [NumClassCheckHook](../../../mmdet/engine/hooks/num_class_check_hook.py) to check whether the numbers are consistent since v2.9.0(after PR#4508).
## COCO Panoptic Dataset
Now we support COCO Panoptic Dataset, the format of panoptic annotations is different from COCO format.
Both the foreground and the background will exist in the annotation file.
The annotation json files in COCO Panoptic format has the following necessary keys:
```python
'images': [
{
'file_name': '000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
]
'annotations': [
{
'filename': '000000001268.jpg',
'image_id': 1268,
'segments_info': [
{
'id':8345037, # One-to-one correspondence with the id in the annotation map.
'category_id': 51,
'iscrowd': 0,
'bbox': (x1, y1, w, h), # The bbox of the background is the outer rectangle of its mask.
'area': 24315
},
...
]
},
...
]
'categories': [ # including both foreground categories and background categories
{'id': 0, 'name': 'person'},
...
]
```
Moreover, the `seg` must be set to the path of the panoptic annotation images.
```python
dataset_type = 'CocoPanopticDataset'
data_root='path/to/your/'
train_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img='train/image_data/', seg='train/panoptic/image_annotation_data/')
)
)
val_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img='val/image_data/', seg='val/panoptic/image_annotation_data/')
)
)
test_dataloader = dict(
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img='test/image_data/', seg='test/panoptic/image_annotation_data/')
)
)
```
# Customize Losses
MMDetection provides users with different loss functions. But the default configuration may be not applicable for different datasets or models, so users may want to modify a specific loss to adapt the new situation.
This tutorial first elaborate the computation pipeline of losses, then give some instructions about how to modify each step. The modification can be categorized as tweaking and weighting.
## Computation pipeline of a loss
Given the input prediction and target, as well as the weights, a loss function maps the input tensor to the final loss scalar. The mapping can be divided into five steps:
1. Set the sampling method to sample positive and negative samples.
2. Get **element-wise** or **sample-wise** loss by the loss kernel function.
3. Weighting the loss with a weight tensor **element-wisely**.
4. Reduce the loss tensor to a **scalar**.
5. Weighting the loss with a **scalar**.
## Set sampling method (step 1)
For some loss functions, sampling strategies are needed to avoid imbalance between positive and negative samples.
For example, when using `CrossEntropyLoss` in RPN head, we need to set `RandomSampler` in `train_cfg`
```python
train_cfg=dict(
rpn=dict(
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False))
```
For some other losses which have positive and negative sample balance mechanism such as Focal Loss, GHMC, and QualityFocalLoss, the sampler is no more necessary.
## Tweaking loss
Tweaking a loss is more related with step 2, 4, 5, and most modifications can be specified in the config.
Here we take [Focal Loss (FL)](../../../mmdet/models/losses/focal_loss.py) as an example.
The following code sniper are the construction method and config of FL respectively, they are actually one to one correspondence.
```python
@LOSSES.register_module()
class FocalLoss(nn.Module):
def __init__(self,
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
reduction='mean',
loss_weight=1.0):
```
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0)
```
### Tweaking hyper-parameters (step 2)
`gamma` and `beta` are two hyper-parameters in the Focal Loss. Say if we want to change the value of `gamma` to be 1.5 and `alpha` to be 0.5, then we can specify them in the config as follows:
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=1.5,
alpha=0.5,
loss_weight=1.0)
```
### Tweaking the way of reduction (step 3)
The default way of reduction is `mean` for FL. Say if we want to change the reduction from `mean` to `sum`, we can specify it in the config as follows:
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0,
reduction='sum')
```
### Tweaking loss weight (step 5)
The loss weight here is a scalar which controls the weight of different losses in multi-task learning, e.g. classification loss and regression loss. Say if we want to change to loss weight of classification loss to be 0.5, we can specify it in the config as follows:
```python
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=0.5)
```
## Weighting loss (step 3)
Weighting loss means we re-weight the loss element-wisely. To be more specific, we multiply the loss tensor with a weight tensor which has the same shape. As a result, different entries of the loss can be scaled differently, and so called element-wisely.
The loss weight varies across different models and highly context related, but overall there are two kinds of loss weights, `label_weights` for classification loss and `bbox_weights` for bbox regression loss. You can find them in the `get_target` method of the corresponding head. Here we take [ATSSHead](../../../mmdet/models/dense_heads/atss_head.py#L322) as an example, which inherit [AnchorHead](../../../mmdet/models/dense_heads/anchor_head.py) but overwrite its `get_targets` method which yields different `label_weights` and `bbox_weights`.
```
class ATSSHead(AnchorHead):
...
def get_targets(self,
anchor_list,
valid_flag_list,
gt_bboxes_list,
img_metas,
gt_bboxes_ignore_list=None,
gt_labels_list=None,
label_channels=1,
unmap_outputs=True):
```
# Customize Models
We basically categorize model components into 5 types.
- backbone: usually an FCN network to extract feature maps, e.g., ResNet, MobileNet.
- neck: the component between backbones and heads, e.g., FPN, PAFPN.
- head: the component for specific tasks, e.g., bbox prediction and mask prediction.
- roi extractor: the part for extracting RoI features from feature maps, e.g., RoI Align.
- loss: the component in head for calculating losses, e.g., FocalLoss, L1Loss, and GHMLoss.
## Develop new components
### Add a new backbone
Here we show how to develop new components with an example of MobileNet.
#### 1. Define a new backbone (e.g. MobileNet)
Create a new file `mmdet/models/backbones/mobilenet.py`.
```python
import torch.nn as nn
from mmdet.registry import MODELS
@MODELS.register_module()
class MobileNet(nn.Module):
def __init__(self, arg1, arg2):
pass
def forward(self, x): # should return a tuple
pass
```
#### 2. Import the module
You can either add the following line to `mmdet/models/backbones/__init__.py`
```python
from .mobilenet import MobileNet
```
or alternatively add
```python
custom_imports = dict(
imports=['mmdet.models.backbones.mobilenet'],
allow_failed_imports=False)
```
to the config file to avoid modifying the original code.
#### 3. Use the backbone in your config file
```python
model = dict(
...
backbone=dict(
type='MobileNet',
arg1=xxx,
arg2=xxx),
...
```
### Add new necks
#### 1. Define a neck (e.g. PAFPN)
Create a new file `mmdet/models/necks/pafpn.py`.
```python
import torch.nn as nn
from mmdet.registry import MODELS
@MODELS.register_module()
class PAFPN(nn.Module):
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=0,
end_level=-1,
add_extra_convs=False):
pass
def forward(self, inputs):
# implementation is ignored
pass
```
#### 2. Import the module
You can either add the following line to `mmdet/models/necks/__init__.py`,
```python
from .pafpn import PAFPN
```
or alternatively add
```python
custom_imports = dict(
imports=['mmdet.models.necks.pafpn'],
allow_failed_imports=False)
```
to the config file and avoid modifying the original code.
#### 3. Modify the config file
```python
neck=dict(
type='PAFPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5)
```
### Add new heads
Here we show how to develop a new head with the example of [Double Head R-CNN](https://arxiv.org/abs/1904.06493) as the following.
First, add a new bbox head in `mmdet/models/roi_heads/bbox_heads/double_bbox_head.py`.
Double Head R-CNN implements a new bbox head for object detection.
To implement a bbox head, basically we need to implement three functions of the new module as the following.
```python
from typing import Tuple
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmengine.model import BaseModule, ModuleList
from torch import Tensor
from mmdet.models.backbones.resnet import Bottleneck
from mmdet.registry import MODELS
from mmdet.utils import ConfigType, MultiConfig, OptConfigType, OptMultiConfig
from .bbox_head import BBoxHead
@MODELS.register_module()
class DoubleConvFCBBoxHead(BBoxHead):
r"""Bbox head used in Double-Head R-CNN
.. code-block:: none
/-> cls
/-> shared convs ->
\-> reg
roi features
/-> cls
\-> shared fc ->
\-> reg
""" # noqa: W605
def __init__(self,
num_convs: int = 0,
num_fcs: int = 0,
conv_out_channels: int = 1024,
fc_out_channels: int = 1024,
conv_cfg: OptConfigType = None,
norm_cfg: ConfigType = dict(type='BN'),
init_cfg: MultiConfig = dict(
type='Normal',
override=[
dict(type='Normal', name='fc_cls', std=0.01),
dict(type='Normal', name='fc_reg', std=0.001),
dict(
type='Xavier',
name='fc_branch',
distribution='uniform')
]),
**kwargs) -> None:
kwargs.setdefault('with_avg_pool', True)
super().__init__(init_cfg=init_cfg, **kwargs)
def forward(self, x_cls: Tensor, x_reg: Tensor) -> Tuple[Tensor]:
```
Second, implement a new RoI Head if it is necessary. We plan to inherit the new `DoubleHeadRoIHead` from `StandardRoIHead`. We can find that a `StandardRoIHead` already implements the following functions.
```python
from typing import List, Optional, Tuple
import torch
from torch import Tensor
from mmdet.registry import MODELS, TASK_UTILS
from mmdet.structures import DetDataSample
from mmdet.structures.bbox import bbox2roi
from mmdet.utils import ConfigType, InstanceList
from ..task_modules.samplers import SamplingResult
from ..utils import empty_instances, unpack_gt_instances
from .base_roi_head import BaseRoIHead
@MODELS.register_module()
class StandardRoIHead(BaseRoIHead):
"""Simplest base roi head including one bbox head and one mask head."""
def init_assigner_sampler(self) -> None:
def init_bbox_head(self, bbox_roi_extractor: ConfigType,
bbox_head: ConfigType) -> None:
def init_mask_head(self, mask_roi_extractor: ConfigType,
mask_head: ConfigType) -> None:
def forward(self, x: Tuple[Tensor],
rpn_results_list: InstanceList) -> tuple:
def loss(self, x: Tuple[Tensor], rpn_results_list: InstanceList,
batch_data_samples: List[DetDataSample]) -> dict:
def _bbox_forward(self, x: Tuple[Tensor], rois: Tensor) -> dict:
def bbox_loss(self, x: Tuple[Tensor],
sampling_results: List[SamplingResult]) -> dict:
def mask_loss(self, x: Tuple[Tensor],
sampling_results: List[SamplingResult], bbox_feats: Tensor,
batch_gt_instances: InstanceList) -> dict:
def _mask_forward(self,
x: Tuple[Tensor],
rois: Tensor = None,
pos_inds: Optional[Tensor] = None,
bbox_feats: Optional[Tensor] = None) -> dict:
def predict_bbox(self,
x: Tuple[Tensor],
batch_img_metas: List[dict],
rpn_results_list: InstanceList,
rcnn_test_cfg: ConfigType,
rescale: bool = False) -> InstanceList:
def predict_mask(self,
x: Tuple[Tensor],
batch_img_metas: List[dict],
results_list: InstanceList,
rescale: bool = False) -> InstanceList:
```
Double Head's modification is mainly in the `bbox_forward` logic, and it inherits other logics from the `StandardRoIHead`. In the `mmdet/models/roi_heads/double_roi_head.py`, we implement the new RoI Head as the following:
```python
from typing import Tuple
from torch import Tensor
from mmdet.registry import MODELS
from .standard_roi_head import StandardRoIHead
@MODELS.register_module()
class DoubleHeadRoIHead(StandardRoIHead):
"""RoI head for `Double Head RCNN <https://arxiv.org/abs/1904.06493>`_.
Args:
reg_roi_scale_factor (float): The scale factor to extend the rois
used to extract the regression features.
"""
def __init__(self, reg_roi_scale_factor: float, **kwargs):
super().__init__(**kwargs)
self.reg_roi_scale_factor = reg_roi_scale_factor
def _bbox_forward(self, x: Tuple[Tensor], rois: Tensor) -> dict:
"""Box head forward function used in both training and testing.
Args:
x (tuple[Tensor]): List of multi-level img features.
rois (Tensor): RoIs with the shape (n, 5) where the first
column indicates batch id of each RoI.
Returns:
dict[str, Tensor]: Usually returns a dictionary with keys:
- `cls_score` (Tensor): Classification scores.
- `bbox_pred` (Tensor): Box energies / deltas.
- `bbox_feats` (Tensor): Extract bbox RoI features.
"""
bbox_cls_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs], rois)
bbox_reg_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs],
rois,
roi_scale_factor=self.reg_roi_scale_factor)
if self.with_shared_head:
bbox_cls_feats = self.shared_head(bbox_cls_feats)
bbox_reg_feats = self.shared_head(bbox_reg_feats)
cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats)
bbox_results = dict(
cls_score=cls_score,
bbox_pred=bbox_pred,
bbox_feats=bbox_cls_feats)
return bbox_results
```
Last, the users need to add the module in
`mmdet/models/bbox_heads/__init__.py` and `mmdet/models/roi_heads/__init__.py` thus the corresponding registry could find and load them.
Alternatively, the users can add
```python
custom_imports=dict(
imports=['mmdet.models.roi_heads.double_roi_head', 'mmdet.models.roi_heads.bbox_heads.double_bbox_head'])
```
to the config file and achieve the same goal.
The config file of Double Head R-CNN is as the following
```python
_base_ = '../faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py'
model = dict(
roi_head=dict(
type='DoubleHeadRoIHead',
reg_roi_scale_factor=1.3,
bbox_head=dict(
_delete_=True,
type='DoubleConvFCBBoxHead',
num_convs=4,
num_fcs=2,
in_channels=256,
conv_out_channels=1024,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0))))
```
Since MMDetection 2.0, the config system supports to inherit configs such that the users can focus on the modification.
The Double Head R-CNN mainly uses a new `DoubleHeadRoIHead` and a new `DoubleConvFCBBoxHead `, the arguments are set according to the `__init__` function of each module.
### Add new loss
Assume you want to add a new loss as `MyLoss`, for bounding box regression.
To add a new loss function, the users need implement it in `mmdet/models/losses/my_loss.py`.
The decorator `weighted_loss` enable the loss to be weighted for each element.
```python
import torch
import torch.nn as nn
from mmdet.registry import MODELS
from .utils import weighted_loss
@weighted_loss
def my_loss(pred, target):
assert pred.size() == target.size() and target.numel() > 0
loss = torch.abs(pred - target)
return loss
@MODELS.register_module()
class MyLoss(nn.Module):
def __init__(self, reduction='mean', loss_weight=1.0):
super(MyLoss, self).__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_bbox = self.loss_weight * my_loss(
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
return loss_bbox
```
Then the users need to add it in the `mmdet/models/losses/__init__.py`.
```python
from .my_loss import MyLoss, my_loss
```
Alternatively, you can add
```python
custom_imports=dict(
imports=['mmdet.models.losses.my_loss'])
```
to the config file and achieve the same goal.
To use it, modify the `loss_xxx` field.
Since MyLoss is for regression, you need to modify the `loss_bbox` field in the head.
```python
loss_bbox=dict(type='MyLoss', loss_weight=1.0))
```
# Customize Runtime Settings
## Customize optimization settings
Optimization related configuration is now all managed by `optim_wrapper` which usually has three fields: `optimizer`, `paramwise_cfg`, `clip_grad`, refer to [OptimWrapper](https://mmengine.readthedocs.io/en/latest/tutorials/optim_wrapper.md) for more detail. See the example below, where `Adamw` is used as an `optimizer`, the learning rate of the backbone is reduced by a factor of 10, and gradient clipping is added.
```python
optim_wrapper = dict(
type='OptimWrapper',
# optimizer
optimizer=dict(
type='AdamW',
lr=0.0001,
weight_decay=0.05,
eps=1e-8,
betas=(0.9, 0.999)),
# Parameter-level learning rate and weight decay settings
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1, decay_mult=1.0),
},
norm_decay_mult=0.0),
# gradient clipping
clip_grad=dict(max_norm=0.01, norm_type=2))
```
### Customize optimizer supported by Pytorch
We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field in `optim_wrapper` field of config files. For example, if you want to use `ADAM` (note that the performance could drop a lot), the modification could be as the following.
```python
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='Adam', lr=0.0003, weight_decay=0.0001))
```
To modify the learning rate of the model, the users only need to modify the `lr` in `optimizer`. The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
### Customize self-implemented optimizer
#### 1. Define a new optimizer
A customized optimizer could be defined as following.
Assume you want to add a optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`.
You need to create a new directory named `mmdet/engine/optimizers`. And then implement the new optimizer in a file, e.g., in `mmdet/engine/optimizers/my_optimizer.py`:
```python
from mmdet.registry import OPTIMIZERS
from torch.optim import Optimizer
@OPTIMIZERS.register_module()
class MyOptimizer(Optimizer):
def __init__(self, a, b, c)
```
#### 2. Add the optimizer to registry
To find the above module defined above, this module should be imported into the main namespace at first. There are two options to achieve it.
- Modify `mmdet/engine/optimizers/__init__.py` to import it.
The newly defined module should be imported in `mmdet/engine/optimizers/__init__.py` so that the registry will find the new module and add it:
```python
from .my_optimizer import MyOptimizer
```
- Use `custom_imports` in the config to manually import it
```python
custom_imports = dict(imports=['mmdet.engine.optimizers.my_optimizer'], allow_failed_imports=False)
```
The module `mmdet.engine.optimizers.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered.
Note that only the package containing the class `MyOptimizer` should be imported.
`mmdet.engine.optimizers.my_optimizer.MyOptimizer` **cannot** be imported directly.
Actually users can use a totally different file directory structure using this importing method, as long as the module root can be located in `PYTHONPATH`.
#### 3. Specify the optimizer in the config file
Then you can use `MyOptimizer` in `optimizer` field in `optim_wrapper` field of config files. In the configs, the optimizers are defined by the field `optimizer` like the following:
```python
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
```
To use your own optimizer, the field can be changed to
```python
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value))
```
### Customize optimizer wrapper constructor
Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
The users can do those fine-grained parameter tuning through customizing optimizer wrapper constructor.
```python
from mmengine.optim import DefaultOptiWrapperConstructor
from mmdet.registry import OPTIM_WRAPPER_CONSTRUCTORS
from .my_optimizer import MyOptimizer
@OPTIM_WRAPPER_CONSTRUCTORS.register_module()
class MyOptimizerWrapperConstructor(DefaultOptimWrapperConstructor):
def __init__(self,
optim_wrapper_cfg: dict,
paramwise_cfg: Optional[dict] = None):
def __call__(self, model: nn.Module) -> OptimWrapper:
return optim_wrapper
```
The default optimizer wrapper constructor is implemented [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L18), which could also serve as a template for the new optimizer wrapper constructor.
### Additional settings
Tricks not implemented by the optimizer should be implemented through optimizer wrapper constructor (e.g., set parameter-wise learning rates) or hooks. We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings.
- __Use gradient clip to stabilize training__:
Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below:
```python
optim_wrapper = dict(
_delete_=True, clip_grad=dict(max_norm=35, norm_type=2))
```
If your config inherits the base config which already sets the `optim_wrapper`, you might need `_delete_=True` to override the unnecessary settings. See the [config documentation](../user_guides/config.md) for more details.
- __Use momentum schedule to accelerate model convergence__:
We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way.
Momentum scheduler is usually used with LR scheduler, for example, the following config is used in [3D detection](https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/configs/_base_/schedules/cyclic-20e.py) to accelerate convergence.
For more details, please refer to the implementation of [CosineAnnealingLR](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py#L43) and [CosineAnnealingMomentum](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/momentum_scheduler.py#L71).
```python
param_scheduler = [
# learning rate scheduler
# During the first 8 epochs, learning rate increases from 0 to lr * 10
# during the next 12 epochs, learning rate decreases from lr * 10 to lr * 1e-4
dict(
type='CosineAnnealingLR',
T_max=8,
eta_min=lr * 10,
begin=0,
end=8,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=12,
eta_min=lr * 1e-4,
begin=8,
end=20,
by_epoch=True,
convert_to_iter_based=True),
# momentum scheduler
# During the first 8 epochs, momentum increases from 0 to 0.85 / 0.95
# during the next 12 epochs, momentum increases from 0.85 / 0.95 to 1
dict(
type='CosineAnnealingMomentum',
T_max=8,
eta_min=0.85 / 0.95,
begin=0,
end=8,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingMomentum',
T_max=12,
eta_min=1,
begin=8,
end=20,
by_epoch=True,
convert_to_iter_based=True)
]
```
## Customize training schedules
By default we use step learning rate with 1x schedule, this calls [MultiStepLR](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py#L139) in MMEngine.
We support many other learning rate schedule [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py), such as `CosineAnnealingLR` and `PolyLR` schedule. Here are some examples
- Poly schedule:
```python
param_scheduler = [
dict(
type='PolyLR',
power=0.9,
eta_min=1e-4,
begin=0,
end=8,
by_epoch=True)]
```
- ConsineAnnealing schedule:
```python
param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=8,
eta_min=lr * 1e-5,
begin=0,
end=8,
by_epoch=True)]
```
## Customize train loop
By default, `EpochBasedTrainLoop` is used in `train_cfg` and validation is done after every train epoch, as follows.
```python
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_begin=1, val_interval=1)
```
Actually, both [`IterBasedTrainLoop`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L183%5D) and [`EpochBasedTrainLoop`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L18) support dynamical interval, see the following example.
```python
# Before 365001th iteration, we do evaluation every 5000 iterations.
# After 365000th iteration, we do evaluation every 368750 iterations,
# which means that we do evaluation at the end of training.
interval = 5000
max_iters = 368750
dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
train_cfg = dict(
type='IterBasedTrainLoop',
max_iters=max_iters,
val_interval=interval,
dynamic_intervals=dynamic_intervals)
```
## Customize hooks
### Customize self-implemented hooks
#### 1. Implement a new hook
MMEngine provides many useful [hooks](https://mmengine.readthedocs.io/en/latest/tutorials/hooks.html), but there are some occasions when the users might need to implement a new hook. MMDetection supports customized hooks in training in v3.0 . Thus the users could implement a hook directly in mmdet or their mmdet-based codebases and use the hook by only modifying the config in training.
Here we give an example of creating a new hook in mmdet and using it in training.
```python
from mmengine.hooks import Hook
from mmdet.registry import HOOKS
@HOOKS.register_module()
class MyHook(Hook):
def __init__(self, a, b):
def before_run(self, runner) -> None:
def after_run(self, runner) -> None:
def before_train(self, runner) -> None:
def after_train(self, runner) -> None:
def before_train_epoch(self, runner) -> None:
def after_train_epoch(self, runner) -> None:
def before_train_iter(self,
runner,
batch_idx: int,
data_batch: DATA_BATCH = None) -> None:
def after_train_iter(self,
runner,
batch_idx: int,
data_batch: DATA_BATCH = None,
outputs: Optional[dict] = None) -> None:
```
Depending on the functionality of the hook, the users need to specify what the hook will do at each stage of the training in `before_run`, `after_run`, `before_train`, `after_train` , `before_train_epoch`, `after_train_epoch`, `before_train_iter`, and `after_train_iter`. There are more points where hooks can be inserted, refer to [base hook class](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/hook.py#L9) for more detail.
#### 2. Register the new hook
Then we need to make `MyHook` imported. Assuming the file is in `mmdet/engine/hooks/my_hook.py` there are two ways to do that:
- Modify `mmdet/engine/hooks/__init__.py` to import it.
The newly defined module should be imported in `mmdet/engine/hooks/__init__.py` so that the registry will find the new module and add it:
```python
from .my_hook import MyHook
```
- Use `custom_imports` in the config to manually import it
```python
custom_imports = dict(imports=['mmdet.engine.hooks.my_hook'], allow_failed_imports=False)
```
#### 3. Modify the config
```python
custom_hooks = [
dict(type='MyHook', a=a_value, b=b_value)
]
```
You can also set the priority of the hook by adding key `priority` to `'NORMAL'` or `'HIGHEST'` as below
```python
custom_hooks = [
dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL')
]
```
By default the hook's priority is set as `NORMAL` during registration.
### Use hooks implemented in MMDetection
If the hook is already implemented in MMDectection, you can directly modify the config to use the hook as below
#### Example: `NumClassCheckHook`
We implement a customized hook named [NumClassCheckHook](../../../mmdet/engine/hooks/num_class_check_hook.py) to check whether the `num_classes` in head matches the length of `classes` in the metainfo of `dataset`.
We set it in [default_runtime.py](../../../configs/_base_/default_runtime.py).
```python
custom_hooks = [dict(type='NumClassCheckHook')]
```
### Modify default runtime hooks
There are some common hooks that are registered through `default_hooks`, they are
- `IterTimerHook`: A hook that logs 'data_time' for loading data and 'time' for a model train step.
- `LoggerHook`: A hook that Collect logs from different components of `Runner` and write them to terminal, JSON file, tensorboard and wandb .etc.
- `ParamSchedulerHook`: A hook to update some hyper-parameters in optimizer, e.g., learning rate and momentum.
- `CheckpointHook`: A hook that saves checkpoints periodically.
- `DistSamplerSeedHook`: A hook that sets the seed for sampler and batch_sampler.
- `DetVisualizationHook`: A hook used to visualize validation and testing process prediction results.
`IterTimerHook`, `ParamSchedulerHook` and `DistSamplerSeedHook` are simple and no need to be modified usually, so here we reveals how what we can do with `LoggerHook`, `CheckpointHook` and `DetVisualizationHook`.
#### CheckpointHook
Except saving checkpoints periodically, [`CheckpointHook`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py#L19) provides other options such as `max_keep_ckpts`, `save_optimizer` and etc. The users could set `max_keep_ckpts` to only save small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`. More details of the arguments are [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py#L19)
```python
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
interval=1,
max_keep_ckpts=3,
save_optimizer=True))
```
#### LoggerHook
The `LoggerHook` enables to set intervals. And the detail usages can be found in the [docstring](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/logger_hook.py#L18).
```python
default_hooks = dict(logger=dict(type='LoggerHook', interval=50))
```
#### DetVisualizationHook
`DetVisualizationHook` use `DetLocalVisualizer` to visualize prediction results, and `DetLocalVisualizer` current supports different backends, e.g., `TensorboardVisBackend` and `WandbVisBackend` (see [docstring](https://github.com/open-mmlab/mmengine/blob/main/mmengine/visualization/vis_backend.py) for more detail). The users could add multi backbends to do visualization, as follows.
```python
default_hooks = dict(
visualization=dict(type='DetVisualizationHook', draw=True))
vis_backends = [dict(type='LocalVisBackend'),
dict(type='TensorboardVisBackend')]
visualizer = dict(
type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
```
# Customize Data Pipelines
1. Write a new transform in a file, e.g., in `my_pipeline.py`. It takes a dict as input and returns a dict.
```python
import random
from mmcv.transforms import BaseTransform
from mmdet.registry import TRANSFORMS
@TRANSFORMS.register_module()
class MyTransform(BaseTransform):
"""Add your transform
Args:
p (float): Probability of shifts. Default 0.5.
"""
def __init__(self, prob=0.5):
self.prob = prob
def transform(self, results):
if random.random() > self.prob:
results['dummy'] = True
return results
```
2. Import and use the pipeline in your config file.
Make sure the import is relative to where your train script is located.
```python
custom_imports = dict(imports=['path.to.my_pipeline'], allow_failed_imports=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='MyTransform', prob=0.2),
dict(type='PackDetInputs')
]
```
3. Visualize the output of your transforms pipeline
To visualize the output of your transforms pipeline, `tools/misc/browse_dataset.py`
can help the user to browse a detection dataset (both images and bounding box annotations)
visually, or save the image to a designated directory. More details can refer to
[visualization documentation](../user_guides/visualization.md)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment