"docs/zh_cn/user_guides/index.rst" did not exist on "fdfe3c4f8ba935ae428a8a496ce57755d5b2ea98"
Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
......@@ -4,16 +4,14 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import, division, print_function
import DCNv3
import torch
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.cuda.amp import custom_bwd, custom_fwd
import DCNv3
class DCNv3Function(Function):
......@@ -88,6 +86,7 @@ class DCNv3Function(Function):
im2col_step_i=int(im2col_step),
)
def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1):
_, H_, W_, _ = spatial_shapes
H_out = (H_ - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1
......
......@@ -4,22 +4,24 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import, division, print_function
import warnings
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.init import xavier_uniform_, constant_
from torch import nn
from torch.nn.init import constant_, xavier_uniform_
from ..functions import DCNv3Function, dcnv3_core_pytorch
try:
from DCNv4.functions import DCNv4Function
except:
warnings.warn('Now, we support DCNv4 in InternImage.')
import math
class to_channels_first(nn.Module):
def __init__(self):
......@@ -76,7 +78,7 @@ def build_act_layer(act_layer):
def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0):
raise ValueError(
"invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
'invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n)))
return (n & (n - 1) == 0) and n != 0
......@@ -128,7 +130,7 @@ class DCNv3_pytorch(nn.Module):
if not _is_power_of_2(_d_per_group):
warnings.warn(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation.")
'which is more efficient in our CUDA implementation.')
self.offset_scale = offset_scale
self.channels = channels
......@@ -257,7 +259,7 @@ class DCNv3(nn.Module):
if not _is_power_of_2(_d_per_group):
warnings.warn(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation.")
'which is more efficient in our CUDA implementation.')
self.offset_scale = offset_scale
self.channels = channels
......@@ -351,7 +353,7 @@ class DCNv3(nn.Module):
# For efficiency, the last dimension of the offset_mask tensor in dcnv4 is a multiple of 8.
K3 = offset_mask.size(-1)
K3_pad = int(math.ceil(K3/8)*8)
K3_pad = int(math.ceil(K3 / 8) * 8)
pad_dim = K3_pad - K3
offset_mask = torch.cat([offset_mask, offset_mask.new_zeros([*offset_mask.size()[:3], pad_dim])], -1)
......
......@@ -4,39 +4,34 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
import glob
import os
import torch
from setuptools import find_packages, setup
from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
from setuptools import find_packages
from setuptools import setup
requirements = ["torch", "torchvision"]
requirements = ['torch', 'torchvision']
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "src")
extensions_dir = os.path.join(this_dir, 'src')
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
main_file = glob.glob(os.path.join(extensions_dir, '*.cpp'))
source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp'))
source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
sources = main_file + source_cpu
extension = CppExtension
extra_compile_args = {"cxx": []}
extra_compile_args = {'cxx': []}
define_macros = []
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
extra_compile_args["nvcc"] = [
define_macros += [('WITH_CUDA', None)]
extra_compile_args['nvcc'] = [
# "-DCUDA_HAS_FP16=1",
# "-D__CUDA_NO_HALF_OPERATORS__",
# "-D__CUDA_NO_HALF_CONVERSIONS__",
......@@ -49,7 +44,7 @@ def get_extensions():
include_dirs = [extensions_dir]
ext_modules = [
extension(
"DCNv3",
'DCNv3',
sources,
include_dirs=include_dirs,
define_macros=define_macros,
......@@ -60,16 +55,16 @@ def get_extensions():
setup(
name="DCNv3",
version="1.0",
author="InternImage",
url="https://github.com/OpenGVLab/InternImage",
name='DCNv3',
version='1.0',
author='InternImage',
url='https://github.com/OpenGVLab/InternImage',
description=
"PyTorch Wrapper for CUDA Functions of DCNv3",
'PyTorch Wrapper for CUDA Functions of DCNv3',
packages=find_packages(exclude=(
"configs",
"tests",
'configs',
'tests',
)),
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension},
)
......@@ -4,17 +4,15 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import, division, print_function
import math
import time
import torch
import torch.nn as nn
import math
from torch.autograd import gradcheck
from functions.dcnv3_func import DCNv3Function, dcnv3_core_pytorch
from torch.autograd import gradcheck
H_in, W_in = 8, 8
N, M, D = 2, 4, 16
......
......@@ -11,6 +11,8 @@ import time
import warnings
import mmcv
import mmcv_custom # noqa: F401,F403
import mmdet_custom # noqa: F401,F403
import torch
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
......@@ -21,8 +23,6 @@ from mmdet.apis import multi_gpu_test, single_gpu_test
from mmdet.datasets import (build_dataloader, build_dataset,
replace_ImageToTensor)
from mmdet.models import build_detector
import mmdet_custom # noqa: F401,F403
import mmcv_custom # noqa: F401,F403
def parse_args():
......
import argparse
import concurrent.futures
import json
import os
import pickle as pkl
import numpy as np
import random
from PIL import Image
import concurrent.futures
import json
import mmcv
import numpy as np
from PIL import Image
def parse_args():
parser = argparse.ArgumentParser(description='Generate MMDetection Annotations for Crowdhuman-like dataset')
......@@ -16,6 +18,7 @@ def parse_args():
args = parser.parse_args()
return args.dataset, args.dataset_split
def load_func(fpath):
assert os.path.exists(fpath)
with open(fpath, 'r') as fid:
......@@ -23,6 +26,7 @@ def load_func(fpath):
records = [json.loads(line.strip('\n')) for line in lines]
return records
def decode_annotations(records, dataset_path):
rec_ids = list(range(len(records)))
img_list = []
......@@ -80,16 +84,17 @@ def decode_annotations(records, dataset_path):
)
return json_dict
if __name__ == "__main__":
if __name__ == '__main__':
dataset_name, dataset_type = parse_args()
dataset_path = 'data/%s/' % dataset_name
ch_file_path = dataset_path + 'annotations/annotation_%s.odgt' % dataset_type
json_file_path = dataset_path + 'annotations/annotation_%s.json' % dataset_type
records = load_func(ch_file_path)
print("Loading Annotations Done")
print('Loading Annotations Done')
json_dict = decode_annotations(records, dataset_path)
print("Parsing Bbox Number: %d" % len(json_dict['annotations']))
print('Parsing Bbox Number: %d' % len(json_dict['annotations']))
mmcv.dump(json_dict, json_file_path)
......@@ -12,12 +12,13 @@ import time
import warnings
import mmcv
import mmcv_custom # noqa: F401,F403
import mmdet_custom # noqa: F401,F403
import torch
import torch.distributed as dist
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from mmcv.utils import get_git_hash
from mmdet import __version__
from mmdet.apis import init_random_seed, set_random_seed, train_detector
from mmdet.datasets import build_dataset
......@@ -25,8 +26,6 @@ from mmdet.models import build_detector
from mmdet.utils import (collect_env, get_device, get_root_logger,
replace_cfg_vals, setup_multi_processes,
update_data_root)
import mmcv_custom # noqa: F401,F403
import mmdet_custom # noqa: F401,F403
def parse_args():
......
......@@ -5,16 +5,13 @@ import shutil
import tempfile
import time
import mmcv
import numpy as np
import torch
import torch.distributed as dist
import torch.nn.functional as F
import mmcv
from mmcv.image import tensor2imgs
from mmcv.runner import get_dist_info
from mmdet.core import encode_mask_results
......@@ -42,11 +39,11 @@ def prompt_sam_with_bboxes(sam_predictor, data, box_result):
# `antialias=True` is provided in official implementation of SAM,
# which may raise TypeError in PyTorch of previous versions.
transformed_img = F.interpolate(
img, target_size, mode="bilinear",
img, target_size, mode='bilinear',
align_corners=False, antialias=True)
except TypeError:
transformed_img = F.interpolate(
img, target_size, mode="bilinear", align_corners=False)
img, target_size, mode='bilinear', align_corners=False)
# Pad to 1024 x 1024
h, w = transformed_img.shape[-2:]
pad_h = sam_predictor.model.image_encoder.img_size - h
......@@ -152,4 +149,3 @@ def single_gpu_test(model,
for _ in range(batch_size):
prog_bar.update()
return results
......@@ -17,14 +17,15 @@ from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
wrap_fp16_model)
from mmdet.apis import multi_gpu_test
from mmdet.datasets import (build_dataloader, build_dataset,
replace_ImageToTensor)
from mmdet.models import build_detector
from mmdet.apis import multi_gpu_test
import detection.mmdet_custom # noqa: F401,F403
from segment_anything import SamPredictor, sam_model_registry
import detection.mmcv_custom # noqa: F401,F403
import detection.mmdet_custom # noqa: F401,F403
from segment_anything import sam_model_registry, SamPredictor
try:
from .engine import single_gpu_test
except ImportError:
......
......@@ -27,6 +27,7 @@ conda activate internimage
- Install `PyTorch>=1.10.0` and `torchvision>=0.9.0` with `CUDA>=10.2`:
For examples, to install torch==1.11 with CUDA==11.3 and nvcc:
```bash
conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch -y
conda install -c conda-forge cudatoolkit-dev=11.3 -y # to install nvcc
......@@ -41,7 +42,7 @@ conda install -c conda-forge termcolor yacs pyyaml scipy pip -y
pip install opencv-python
```
- Install `timm` and `mmcv-full` and `mmsegmentation':
- Install `timm` and `mmcv-full` and \`mmsegmentation':
```bash
pip install -U openmim
......@@ -51,20 +52,21 @@ pip install timm==0.6.11 mmdet==2.28.1
```
- Compile CUDA operators
```bash
cd ./ops_dcnv3
sh ./make.sh
# unit test (should see all checking is True)
python test.py
```
- You can also install the operator using .whl files
[DCNv3-1.0-whl](https://github.com/OpenGVLab/InternImage/releases/tag/whl_files)
[DCNv3-1.0-whl](https://github.com/OpenGVLab/InternImage/releases/tag/whl_files)
### Data Preparation
Prepare datasets according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets) in MMSegmentation.
### Evaluation
To evaluate our `InternImage` on ADE20K val, run:
......@@ -72,6 +74,7 @@ To evaluate our `InternImage` on ADE20K val, run:
```bash
sh dist_test.sh <config-file> <checkpoint> <gpu-num> --eval mIoU
```
You can download checkpoint files from [here](https://huggingface.co/OpenGVLab/InternImage/tree/fc1e4e7e01c3e7a39a3875bdebb6577a7256ff91). Then place it to segmentation/checkpoint_dir/seg.
For example, to evaluate the `InternImage-T` with a single GPU:
......@@ -109,8 +112,10 @@ GPUS=8 sh slurm_train.sh <partition> <job-name> configs/ade20k/upernet_internima
```
### Image Demo
To inference a single/multiple image like this.
If you specify image containing directory instead of a single image, it will process all the images in the directory.:
```
CUDA_VISIBLE_DEVICES=0 python image_demo.py \
data/ade/ADEChallengeData2016/images/validation/ADE_val_00000591.jpg \
......@@ -122,6 +127,7 @@ CUDA_VISIBLE_DEVICES=0 python image_demo.py \
### Export
To export a segmentation model from PyTorch to TensorRT, run:
```shell
MODEL="model_name"
CKPT_PATH="/path/to/model/ckpt.pth"
......@@ -137,6 +143,7 @@ python deploy.py \
```
For example, to export `upernet_internimage_t_512_160k_ade20k` from PyTorch to TensorRT, run:
```shell
MODEL="upernet_internimage_t_512_160k_ade20k"
CKPT_PATH="/path/to/model/ckpt/upernet_internimage_t_512_160k_ade20k.pth"
......
......@@ -4,18 +4,16 @@ Introduced by Zhou et al. in [Scene Parsing Through ADE20K Dataset](https://pape
The ADE20K semantic segmentation dataset contains more than 20K scene-centric images exhaustively annotated with pixel-level objects and object parts labels. There are totally 150 semantic categories, which include stuffs like sky, road, grass, and discrete objects like person, car, bed.
## Model Zoo
### UperNet + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download |
|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:|
| :------------: | :--------: | :----------: | :----------: | :--------: | :----: | :---: | :---------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-T | 512x512 | 47.9 / 48.1 | 0.23s / iter | 10.5h | 59M | 944G | [config](./upernet_internimage_t_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512_160k_ade20k.log.json) |
| InternImage-S | 512x512 | 50.1 / 50.9 | 0.25s / iter | 11.5h | 80M | 1017G | [config](./upernet_internimage_s_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512_160k_ade20k.log.json) |
| InternImage-B | 512x512 | 50.8 / 51.3 | 0.26s / iter | 12h | 128M | 1185G | [config](./upernet_internimage_b_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512_160k_ade20k.log.json) |
| InternImage-L | 640x640 | 53.9 / 54.1 | 0.42s / iter | 19h | 256M | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py)| [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_640_160k_ade20k.log.json) |
| InternImage-L | 640x640 | 53.9 / 54.1 | 0.42s / iter | 19h | 256M | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_640_160k_ade20k.log.json) |
| InternImage-XL | 640x640 | 55.0 / 55.3 | 0.47s / iter | 22h | 368M | 3142G | [config](./upernet_internimage_xl_640_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_640_160k_ade20k.log.json) |
| InternImage-H | 896x896 | 59.9 / 60.3 | 0.94s / iter | 2d (2n) | 1.12B | 3566G | [config](./upernet_internimage_h_896_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_h_896_160k_ade20k.log.json) |
......@@ -23,9 +21,8 @@ The ADE20K semantic segmentation dataset contains more than 20K scene-centric im
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
- The logs are our recent newly trained ones. There are slight differences between the results in logs and our paper.
### Mask2Former + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download |
|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:|
| :-----------: | :--------: | :----------: | :----------: | :--------: | :----: | :---: | :------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-H | 896x896 | 62.6 / 62.9 | 1.21s / iter | 1.5d (2n) | 1.31B | 4635G | [config](./mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.log.json) |
......@@ -161,4 +161,3 @@ optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2)
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=2000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
......@@ -9,12 +9,12 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
### UperNet + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:|
| :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-T | 512x1024 | 82.58 / 83.40 | 0.32s / iter | 14.5h | 59M | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512x1024_160k_cityscapes.log.json) |
| InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json) |
| InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json) |
| InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json) |
| InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
| InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json) |
| InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json) |
| InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json) |
| InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
- Training speed is measured with A100 GPU.
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
......@@ -24,7 +24,7 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
| :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :----------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-L | 512x1024 | 85.94 / 86.22 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 86.20 / 86.42 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
......@@ -33,6 +33,6 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k
Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
| :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-L | 512x1024 | 85.16 / 85.67 | 0.37s / iter | 17h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 85.41 / 85.93 | 0.43s / iter | 19.5h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment