Use pre-commit to reformat code

Use pre-commit to reformat code

Use pre-commit to reformat code
41b18fd8 · zhe chen · ff20ea39 · 41b18fd8 · 41b18fd8 · 41b18fd8
Commit 41b18fd8 authored Jan 06, 2025 by zhe chen
20 changed files
--- a/detection/ops_dcnv3/functions/dcnv3_func.py
+++ b/detection/ops_dcnv3/functions/dcnv3_func.py
@@ -4,16 +4,14 @@
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------

-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import division
+from __future__ import absolute_import, division, print_function

+import DCNv3
 import torch
 import torch.nn.functional as F
 from torch.autograd import Function
 from torch.autograd.function import once_differentiable
 from torch.cuda.amp import custom_bwd, custom_fwd
-import DCNv3


 class DCNv3Function(Function):
@@ -88,6 +86,7 @@ class DCNv3Function(Function):
            im2col_step_i=int(im2col_step),
        )

+
 def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1):
    _, H_, W_, _ = spatial_shapes
    H_out = (H_ - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1

--- a/detection/ops_dcnv3/modules/__init__.py
+++ b/detection/ops_dcnv3/modules/__init__.py
--- a/detection/ops_dcnv3/modules/dcnv3.py
+++ b/detection/ops_dcnv3/modules/dcnv3.py
@@ -4,22 +4,24 @@
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------

-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import division
+from __future__ import absolute_import, division, print_function

 import warnings
+
 import torch
-from torch import nn
 import torch.nn.functional as F
-from torch.nn.init import xavier_uniform_, constant_
+from torch import nn
+from torch.nn.init import constant_, xavier_uniform_
+
 from ..functions import DCNv3Function, dcnv3_core_pytorch
+
 try:
    from DCNv4.functions import DCNv4Function
 except:
    warnings.warn('Now, we support DCNv4 in InternImage.')
 import math

+
 class to_channels_first(nn.Module):

    def __init__(self):
@@ -76,7 +78,7 @@ def build_act_layer(act_layer):
 def _is_power_of_2(n):
    if (not isinstance(n, int)) or (n < 0):
        raise ValueError(
-            "invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+            'invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n)))

    return (n & (n - 1) == 0) and n != 0

@@ -128,7 +130,7 @@ class DCNv3_pytorch(nn.Module):
        if not _is_power_of_2(_d_per_group):
            warnings.warn(
                "You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
-                "which is more efficient in our CUDA implementation.")
+                'which is more efficient in our CUDA implementation.')

        self.offset_scale = offset_scale
        self.channels = channels
@@ -257,7 +259,7 @@ class DCNv3(nn.Module):
        if not _is_power_of_2(_d_per_group):
            warnings.warn(
                "You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
-                "which is more efficient in our CUDA implementation.")
+                'which is more efficient in our CUDA implementation.')

        self.offset_scale = offset_scale
        self.channels = channels
@@ -351,7 +353,7 @@ class DCNv3(nn.Module):

            # For efficiency, the last dimension of the offset_mask tensor in dcnv4 is a multiple of 8.
            K3 = offset_mask.size(-1)
-            K3_pad = int(math.ceil(K3/8)*8)
+            K3_pad = int(math.ceil(K3 / 8) * 8)
            pad_dim = K3_pad - K3
            offset_mask = torch.cat([offset_mask, offset_mask.new_zeros([*offset_mask.size()[:3], pad_dim])], -1)


--- a/detection/ops_dcnv3/setup.py
+++ b/detection/ops_dcnv3/setup.py
@@ -4,39 +4,34 @@
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------

-import os
 import glob
+import os

 import torch
+from setuptools import find_packages, setup
+from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension

-from torch.utils.cpp_extension import CUDA_HOME
-from torch.utils.cpp_extension import CppExtension
-from torch.utils.cpp_extension import CUDAExtension
-
-from setuptools import find_packages
-from setuptools import setup
-
-requirements = ["torch", "torchvision"]
+requirements = ['torch', 'torchvision']


 def get_extensions():
    this_dir = os.path.dirname(os.path.abspath(__file__))
-    extensions_dir = os.path.join(this_dir, "src")
+    extensions_dir = os.path.join(this_dir, 'src')

-    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
-    source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
-    source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
+    main_file = glob.glob(os.path.join(extensions_dir, '*.cpp'))
+    source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp'))
+    source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))

    sources = main_file + source_cpu
    extension = CppExtension
-    extra_compile_args = {"cxx": []}
+    extra_compile_args = {'cxx': []}
    define_macros = []

    if torch.cuda.is_available() and CUDA_HOME is not None:
        extension = CUDAExtension
        sources += source_cuda
-        define_macros += [("WITH_CUDA", None)]
-        extra_compile_args["nvcc"] = [
+        define_macros += [('WITH_CUDA', None)]
+        extra_compile_args['nvcc'] = [
            # "-DCUDA_HAS_FP16=1",
            # "-D__CUDA_NO_HALF_OPERATORS__",
            # "-D__CUDA_NO_HALF_CONVERSIONS__",
@@ -49,7 +44,7 @@ def get_extensions():
    include_dirs = [extensions_dir]
    ext_modules = [
        extension(
-            "DCNv3",
+            'DCNv3',
            sources,
            include_dirs=include_dirs,
            define_macros=define_macros,
@@ -60,16 +55,16 @@ def get_extensions():


 setup(
-    name="DCNv3",
-    version="1.0",
-    author="InternImage",
-    url="https://github.com/OpenGVLab/InternImage",
+    name='DCNv3',
+    version='1.0',
+    author='InternImage',
+    url='https://github.com/OpenGVLab/InternImage',
    description=
-    "PyTorch Wrapper for CUDA Functions of DCNv3",
+    'PyTorch Wrapper for CUDA Functions of DCNv3',
    packages=find_packages(exclude=(
-        "configs",
-        "tests",
+        'configs',
+        'tests',
    )),
    ext_modules=get_extensions(),
-    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+    cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension},
 )
--- a/detection/ops_dcnv3/src/cuda/dcnv3_cuda.cu
+++ b/detection/ops_dcnv3/src/cuda/dcnv3_cuda.cu
--- a/detection/ops_dcnv3/src/cuda/dcnv3_im2col_cuda.cuh
+++ b/detection/ops_dcnv3/src/cuda/dcnv3_im2col_cuda.cuh
--- a/detection/ops_dcnv3/test.py
+++ b/detection/ops_dcnv3/test.py
@@ -4,17 +4,15 @@
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------

-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import division
+from __future__ import absolute_import, division, print_function

+import math
 import time
+
 import torch
 import torch.nn as nn
-import math
-from torch.autograd import gradcheck
-
 from functions.dcnv3_func import DCNv3Function, dcnv3_core_pytorch
+from torch.autograd import gradcheck

 H_in, W_in = 8, 8
 N, M, D = 2, 4, 16

--- a/detection/slurm_train.sh
+++ b/detection/slurm_train.sh
--- a/detection/test.py
+++ b/detection/test.py
@@ -11,6 +11,8 @@ import time
 import warnings

 import mmcv
+import mmcv_custom  # noqa: F401,F403
+import mmdet_custom  # noqa: F401,F403
 import torch
 from mmcv import Config, DictAction
 from mmcv.cnn import fuse_conv_bn
@@ -21,8 +23,6 @@ from mmdet.apis import multi_gpu_test, single_gpu_test
 from mmdet.datasets import (build_dataloader, build_dataset,
                            replace_ImageToTensor)
 from mmdet.models import build_detector
-import mmdet_custom  # noqa: F401,F403
-import mmcv_custom  # noqa: F401,F403


 def parse_args():

--- a/detection/tools/create_crowd_anno.py
+++ b/detection/tools/create_crowd_anno.py
 import argparse
+import concurrent.futures
+import json
 import os
 import pickle as pkl
-import numpy as np
 import random
-from PIL import Image
-import concurrent.futures
-import json
+
 import mmcv
+import numpy as np
+from PIL import Image
+

 def parse_args():
    parser = argparse.ArgumentParser(description='Generate MMDetection Annotations for Crowdhuman-like dataset')
@@ -16,6 +18,7 @@ def parse_args():
    args = parser.parse_args()
    return args.dataset, args.dataset_split

+
 def load_func(fpath):
    assert os.path.exists(fpath)
    with open(fpath, 'r') as fid:
@@ -23,6 +26,7 @@ def load_func(fpath):
    records = [json.loads(line.strip('\n')) for line in lines]
    return records

+
 def decode_annotations(records, dataset_path):
    rec_ids = list(range(len(records)))
    img_list = []
@@ -80,16 +84,17 @@ def decode_annotations(records, dataset_path):
    )
    return json_dict

-if __name__ == "__main__":
+
+if __name__ == '__main__':
    dataset_name, dataset_type = parse_args()
    dataset_path = 'data/%s/' % dataset_name
    ch_file_path = dataset_path + 'annotations/annotation_%s.odgt' % dataset_type
    json_file_path = dataset_path + 'annotations/annotation_%s.json' % dataset_type

    records = load_func(ch_file_path)
-    print("Loading Annotations Done")
+    print('Loading Annotations Done')

    json_dict = decode_annotations(records, dataset_path)

-    print("Parsing Bbox Number: %d" % len(json_dict['annotations']))
+    print('Parsing Bbox Number: %d' % len(json_dict['annotations']))
    mmcv.dump(json_dict, json_file_path)
--- a/detection/tools/evaluate/__init__.py
+++ b/detection/tools/evaluate/__init__.py
--- a/detection/train.py
+++ b/detection/train.py
@@ -12,12 +12,13 @@ import time
 import warnings

 import mmcv
+import mmcv_custom  # noqa: F401,F403
+import mmdet_custom  # noqa: F401,F403
 import torch
 import torch.distributed as dist
 from mmcv import Config, DictAction
 from mmcv.runner import get_dist_info, init_dist
 from mmcv.utils import get_git_hash
-
 from mmdet import __version__
 from mmdet.apis import init_random_seed, set_random_seed, train_detector
 from mmdet.datasets import build_dataset
@@ -25,8 +26,6 @@ from mmdet.models import build_detector
 from mmdet.utils import (collect_env, get_device, get_root_logger,
                         replace_cfg_vals, setup_multi_processes,
                         update_data_root)
-import mmcv_custom  # noqa: F401,F403
-import mmdet_custom  # noqa: F401,F403


 def parse_args():

--- a/sam/engine.py
+++ b/sam/engine.py
@@ -5,16 +5,13 @@ import shutil
 import tempfile
 import time

+import mmcv
 import numpy as np
-
 import torch
 import torch.distributed as dist
 import torch.nn.functional as F
-
-import mmcv
 from mmcv.image import tensor2imgs
 from mmcv.runner import get_dist_info
-
 from mmdet.core import encode_mask_results


@@ -42,11 +39,11 @@ def prompt_sam_with_bboxes(sam_predictor, data, box_result):
        # `antialias=True` is provided in official implementation of SAM,
        # which may raise TypeError in PyTorch of previous versions.
        transformed_img = F.interpolate(
-            img, target_size, mode="bilinear",
+            img, target_size, mode='bilinear',
            align_corners=False, antialias=True)
    except TypeError:
        transformed_img = F.interpolate(
-            img, target_size, mode="bilinear", align_corners=False)
+            img, target_size, mode='bilinear', align_corners=False)
    # Pad to 1024 x 1024
    h, w = transformed_img.shape[-2:]
    pad_h = sam_predictor.model.image_encoder.img_size - h
@@ -152,4 +149,3 @@ def single_gpu_test(model,
        for _ in range(batch_size):
            prog_bar.update()
    return results
-
--- a/sam/main_zero_shot_instance_seg.py
+++ b/sam/main_zero_shot_instance_seg.py
@@ -17,14 +17,15 @@ from mmcv.cnn import fuse_conv_bn
 from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
                         wrap_fp16_model)
+from mmdet.apis import multi_gpu_test
 from mmdet.datasets import (build_dataloader, build_dataset,
                            replace_ImageToTensor)
 from mmdet.models import build_detector
-from mmdet.apis import multi_gpu_test
-import detection.mmdet_custom  # noqa: F401,F403
+from segment_anything import SamPredictor, sam_model_registry
+
 import detection.mmcv_custom  # noqa: F401,F403
+import detection.mmdet_custom  # noqa: F401,F403

-from segment_anything import sam_model_registry, SamPredictor
 try:
    from .engine import single_gpu_test
 except ImportError:

--- a/segmentation/README.md
+++ b/segmentation/README.md
@@ -27,6 +27,7 @@ conda activate internimage
 - Install `PyTorch>=1.10.0` and `torchvision>=0.9.0` with `CUDA>=10.2`:

 For examples, to install torch==1.11 with CUDA==11.3 and nvcc:
+
 ```bash
 conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch -y
 conda install -c conda-forge cudatoolkit-dev=11.3 -y # to install nvcc
@@ -41,7 +42,7 @@ conda install -c conda-forge termcolor yacs pyyaml scipy pip -y
 pip install opencv-python
 ```

- Install `timm` and `mmcv-full` and `mmsegmentation':
+- Install `timm` and `mmcv-full` and \`mmsegmentation':

 ```bash
 pip install -U openmim
@@ -51,20 +52,21 @@ pip install timm==0.6.11 mmdet==2.28.1
 ```

 - Compile CUDA operators
+
 ```bash
 cd ./ops_dcnv3
 sh ./make.sh
 # unit test (should see all checking is True)
 python test.py
 ```
+
 - You can also install the operator using .whl files
-[DCNv3-1.0-whl](https://github.com/OpenGVLab/InternImage/releases/tag/whl_files)
+  [DCNv3-1.0-whl](https://github.com/OpenGVLab/InternImage/releases/tag/whl_files)

 ### Data Preparation

 Prepare datasets according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets) in MMSegmentation.

-
 ### Evaluation

 To evaluate our `InternImage` on ADE20K val, run:
@@ -72,6 +74,7 @@ To evaluate our `InternImage` on ADE20K val, run:
 ```bash
 sh dist_test.sh <config-file> <checkpoint> <gpu-num> --eval mIoU
 ```
+
 You can download checkpoint files from [here](https://huggingface.co/OpenGVLab/InternImage/tree/fc1e4e7e01c3e7a39a3875bdebb6577a7256ff91). Then place it to segmentation/checkpoint_dir/seg.

 For example, to evaluate the `InternImage-T` with a single GPU:
@@ -109,8 +112,10 @@ GPUS=8 sh slurm_train.sh <partition> <job-name> configs/ade20k/upernet_internima
 ```

 ### Image Demo
+
 To inference a single/multiple image like this.
 If you specify image containing directory instead of a single image, it will process all the images in the directory.:
+
 ```
 CUDA_VISIBLE_DEVICES=0 python image_demo.py \
  data/ade/ADEChallengeData2016/images/validation/ADE_val_00000591.jpg \
@@ -122,6 +127,7 @@ CUDA_VISIBLE_DEVICES=0 python image_demo.py \
 ### Export

 To export a segmentation model from PyTorch to TensorRT, run:
+
 ```shell
 MODEL="model_name"
 CKPT_PATH="/path/to/model/ckpt.pth"
@@ -137,6 +143,7 @@ python deploy.py \
 ```

 For example, to export `upernet_internimage_t_512_160k_ade20k` from PyTorch to TensorRT, run:
+
 ```shell
 MODEL="upernet_internimage_t_512_160k_ade20k"
 CKPT_PATH="/path/to/model/ckpt/upernet_internimage_t_512_160k_ade20k.pth"

--- a/segmentation/configs/_base_/models/mask2former_beit.py
+++ b/segmentation/configs/_base_/models/mask2former_beit.py
--- a/segmentation/configs/_base_/models/segformer_mit-b0.py
+++ b/segmentation/configs/_base_/models/segformer_mit-b0.py
--- a/segmentation/configs/ade20k/README.md
+++ b/segmentation/configs/ade20k/README.md
@@ -4,18 +4,16 @@ Introduced by Zhou et al. in [Scene Parsing Through ADE20K Dataset](https://pape

 The ADE20K semantic segmentation dataset contains more than 20K scene-centric images exhaustively annotated with pixel-level objects and object parts labels. There are totally 150 semantic categories, which include stuffs like sky, road, grass, and discrete objects like person, car, bed.

-
 ## Model Zoo

 ### UperNet + InternImage

-
 |    backbone    | resolution | mIoU (ss/ms) | train speed  | train time | #param | FLOPs |                        Config                         |                                                                                                           Download                                                                                                           |
-|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:|
+| :------------: | :--------: | :----------: | :----------: | :--------: | :----: | :---: | :---------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 | InternImage-T  |  512x512   | 47.9 / 48.1  | 0.23s / iter |   10.5h    |  59M   | 944G  | [config](./upernet_internimage_t_512_160k_ade20k.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512_160k_ade20k.log.json)  |
 | InternImage-S  |  512x512   | 50.1 / 50.9  | 0.25s / iter |   11.5h    |  80M   | 1017G | [config](./upernet_internimage_s_512_160k_ade20k.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512_160k_ade20k.log.json)  |
 | InternImage-B  |  512x512   | 50.8 / 51.3  | 0.26s / iter |    12h     |  128M  | 1185G | [config](./upernet_internimage_b_512_160k_ade20k.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512_160k_ade20k.log.json)  |
-| InternImage-L  | 640x640    | 53.9 / 54.1  | 0.42s / iter       | 19h        | 256M    | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py)| [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_640_160k_ade20k.log.json)  | 
+| InternImage-L  |  640x640   | 53.9 / 54.1  | 0.42s / iter |    19h     |  256M  | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_640_160k_ade20k.log.json)  |
 | InternImage-XL |  640x640   | 55.0 / 55.3  | 0.47s / iter |    22h     |  368M  | 3142G | [config](./upernet_internimage_xl_640_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_640_160k_ade20k.log.json) |
 | InternImage-H  |  896x896   | 59.9 / 60.3  | 0.94s / iter |  2d (2n)   | 1.12B  | 3566G | [config](./upernet_internimage_h_896_160k_ade20k.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_h_896_160k_ade20k.log.json)  |

@@ -23,9 +21,8 @@ The ADE20K semantic segmentation dataset contains more than 20K scene-centric im
 - Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
 - The logs are our recent newly trained ones. There are slight differences between the results in logs and our paper.

-
 ### Mask2Former + InternImage

 |   backbone    | resolution | mIoU (ss/ms) | train speed  | train time | #param | FLOPs |                                Config                                |                                                                                                                       Download                                                                                                                       |
-|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:|
+| :-----------: | :--------: | :----------: | :----------: | :--------: | :----: | :---: | :------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 | InternImage-H |  896x896   | 62.6 / 62.9  | 1.21s / iter | 1.5d (2n)  | 1.31B  | 4635G | [config](./mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.log.json) |
--- a/segmentation/configs/ade20k/mask2former_internimage_h_896_80k_cocostuff2ade20k_ms.py
+++ b/segmentation/configs/ade20k/mask2former_internimage_h_896_80k_cocostuff2ade20k_ms.py
@@ -161,4 +161,3 @@ optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2)
 checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
 evaluation = dict(interval=2000, metric='mIoU', save_best='mIoU')
 # fp16 = dict(loss_scale=dict(init_scale=512))
-
--- a/segmentation/configs/cityscapes/README.md
+++ b/segmentation/configs/cityscapes/README.md
@@ -9,12 +9,12 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
 ### UperNet + InternImage

 |    backbone    | resolution | mIoU (ss/ms)  | train speed  | train time | #params | FLOPs |                             Config                             |                                                                                                                    Download                                                                                                                    |
-|:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:|
+| :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 | InternImage-T  |  512x1024  | 82.58 / 83.40 | 0.32s / iter |   14.5h    |   59M   | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512x1024_160k_cityscapes.log.json)  |
-| InternImage-S  | 512x1024   |   82.74 / 83.45    | 0.36s / iter       | 16.5h      | 80M     | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json)  |
-| InternImage-B  | 512x1024   |   83.18 / 83.97    | 0.39s / iter       | 17h        | 128M    | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json)  |
-| InternImage-L  | 512x1024   |    83.68 / 84.41   | 0.50s / iter       | 23h        | 256M    | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json)  |
-| InternImage-XL | 512x1024   |    83.62 / 84.28   | 0.56s / iter       | 26h       | 368M    | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
+| InternImage-S  |  512x1024  | 82.74 / 83.45 | 0.36s / iter |   16.5h    |   80M   | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json)  |
+| InternImage-B  |  512x1024  | 83.18 / 83.97 | 0.39s / iter |    17h     |  128M   | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json)  |
+| InternImage-L  |  512x1024  | 83.68 / 84.41 | 0.50s / iter |    23h     |  256M   | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json)  |
+| InternImage-XL |  512x1024  | 83.62 / 84.28 | 0.56s / iter |    26h     |  368M   | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |

 - Training speed is measured with A100 GPU.
 - Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
@@ -24,7 +24,7 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
 Mapillary 80k + Cityscapes (w/ coarse data) 160k

 |    backbone    | resolution | mIoU (ss/ms)  | train speed  | train time | #params | FLOPs |                                  Config                                  |                                                                                                                              Download                                                                                                                              |
-|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
+| :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :----------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 | InternImage-L  |  512x1024  | 85.94 / 86.22 | 0.50s / iter |    23h     |  256M   | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py)  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth)  \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json)  |
 | InternImage-XL |  512x1024  | 86.20 / 86.42 | 0.56s / iter |    26h     |  368M   | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |

@@ -33,6 +33,6 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k
 Mapillary 80k + Cityscapes (w/ coarse data) 160k

 |    backbone    | resolution | mIoU (ss/ms)  | train speed  | train time | #params | FLOPs |                                   Config                                   |                                                                                                                                Download                                                                                                                                |
-|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
+| :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 | InternImage-L  |  512x1024  | 85.16 / 85.67 | 0.37s / iter |    17h     |  220M   | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py)  |  [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json)  |
 | InternImage-XL |  512x1024  | 85.41 / 85.93 | 0.43s / iter |   19.5h    |  330M   | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |