Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
...@@ -4,16 +4,14 @@ ...@@ -4,16 +4,14 @@
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import print_function
from __future__ import division
import DCNv3
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch.autograd import Function from torch.autograd import Function
from torch.autograd.function import once_differentiable from torch.autograd.function import once_differentiable
from torch.cuda.amp import custom_bwd, custom_fwd from torch.cuda.amp import custom_bwd, custom_fwd
import DCNv3
class DCNv3Function(Function): class DCNv3Function(Function):
...@@ -88,6 +86,7 @@ class DCNv3Function(Function): ...@@ -88,6 +86,7 @@ class DCNv3Function(Function):
im2col_step_i=int(im2col_step), im2col_step_i=int(im2col_step),
) )
def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1): def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1):
_, H_, W_, _ = spatial_shapes _, H_, W_, _ = spatial_shapes
H_out = (H_ - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1 H_out = (H_ - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1
......
...@@ -4,4 +4,4 @@ ...@@ -4,4 +4,4 @@
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# -------------------------------------------------------- # --------------------------------------------------------
from .dcnv3 import DCNv3, DCNv3_pytorch from .dcnv3 import DCNv3, DCNv3_pytorch
\ No newline at end of file
...@@ -4,22 +4,24 @@ ...@@ -4,22 +4,24 @@
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import print_function
from __future__ import division
import warnings import warnings
import torch import torch
from torch import nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.nn.init import xavier_uniform_, constant_ from torch import nn
from torch.nn.init import constant_, xavier_uniform_
from ..functions import DCNv3Function, dcnv3_core_pytorch from ..functions import DCNv3Function, dcnv3_core_pytorch
try: try:
from DCNv4.functions import DCNv4Function from DCNv4.functions import DCNv4Function
except: except:
warnings.warn('Now, we support DCNv4 in InternImage.') warnings.warn('Now, we support DCNv4 in InternImage.')
import math import math
class to_channels_first(nn.Module): class to_channels_first(nn.Module):
def __init__(self): def __init__(self):
...@@ -76,7 +78,7 @@ def build_act_layer(act_layer): ...@@ -76,7 +78,7 @@ def build_act_layer(act_layer):
def _is_power_of_2(n): def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0): if (not isinstance(n, int)) or (n < 0):
raise ValueError( raise ValueError(
"invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) 'invalid input for _is_power_of_2: {} (type: {})'.format(n, type(n)))
return (n & (n - 1) == 0) and n != 0 return (n & (n - 1) == 0) and n != 0
...@@ -128,7 +130,7 @@ class DCNv3_pytorch(nn.Module): ...@@ -128,7 +130,7 @@ class DCNv3_pytorch(nn.Module):
if not _is_power_of_2(_d_per_group): if not _is_power_of_2(_d_per_group):
warnings.warn( warnings.warn(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 " "You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation.") 'which is more efficient in our CUDA implementation.')
self.offset_scale = offset_scale self.offset_scale = offset_scale
self.channels = channels self.channels = channels
...@@ -165,7 +167,7 @@ class DCNv3_pytorch(nn.Module): ...@@ -165,7 +167,7 @@ class DCNv3_pytorch(nn.Module):
self.input_proj = nn.Linear(channels, channels) self.input_proj = nn.Linear(channels, channels)
self.output_proj = nn.Linear(channels, channels) self.output_proj = nn.Linear(channels, channels)
self._reset_parameters() self._reset_parameters()
if center_feature_scale: if center_feature_scale:
self.center_feature_scale_proj_weight = nn.Parameter( self.center_feature_scale_proj_weight = nn.Parameter(
torch.zeros((group, channels), dtype=torch.float)) torch.zeros((group, channels), dtype=torch.float))
...@@ -234,7 +236,7 @@ class DCNv3(nn.Module): ...@@ -234,7 +236,7 @@ class DCNv3(nn.Module):
norm_layer='LN', norm_layer='LN',
center_feature_scale=False, center_feature_scale=False,
use_dcn_v4_op=False, use_dcn_v4_op=False,
): ):
""" """
DCNv3 Module DCNv3 Module
:param channels :param channels
...@@ -257,7 +259,7 @@ class DCNv3(nn.Module): ...@@ -257,7 +259,7 @@ class DCNv3(nn.Module):
if not _is_power_of_2(_d_per_group): if not _is_power_of_2(_d_per_group):
warnings.warn( warnings.warn(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 " "You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation.") 'which is more efficient in our CUDA implementation.')
self.offset_scale = offset_scale self.offset_scale = offset_scale
self.channels = channels self.channels = channels
...@@ -270,7 +272,7 @@ class DCNv3(nn.Module): ...@@ -270,7 +272,7 @@ class DCNv3(nn.Module):
self.group_channels = channels // group self.group_channels = channels // group
self.offset_scale = offset_scale self.offset_scale = offset_scale
self.center_feature_scale = center_feature_scale self.center_feature_scale = center_feature_scale
self.use_dcn_v4_op = use_dcn_v4_op self.use_dcn_v4_op = use_dcn_v4_op
self.dw_conv = nn.Sequential( self.dw_conv = nn.Sequential(
...@@ -296,7 +298,7 @@ class DCNv3(nn.Module): ...@@ -296,7 +298,7 @@ class DCNv3(nn.Module):
self.input_proj = nn.Linear(channels, channels) self.input_proj = nn.Linear(channels, channels)
self.output_proj = nn.Linear(channels, channels) self.output_proj = nn.Linear(channels, channels)
self._reset_parameters() self._reset_parameters()
if center_feature_scale: if center_feature_scale:
self.center_feature_scale_proj_weight = nn.Parameter( self.center_feature_scale_proj_weight = nn.Parameter(
torch.zeros((group, channels), dtype=torch.float)) torch.zeros((group, channels), dtype=torch.float))
...@@ -329,7 +331,7 @@ class DCNv3(nn.Module): ...@@ -329,7 +331,7 @@ class DCNv3(nn.Module):
x1 = self.dw_conv(x1) x1 = self.dw_conv(x1)
offset = self.offset(x1) offset = self.offset(x1)
mask = self.mask(x1).reshape(N, H, W, self.group, -1) mask = self.mask(x1).reshape(N, H, W, self.group, -1)
if not self.use_dcn_v4_op: if not self.use_dcn_v4_op:
mask = F.softmax(mask, -1).reshape(N, H, W, -1).type(dtype) mask = F.softmax(mask, -1).reshape(N, H, W, -1).type(dtype)
x = DCNv3Function.apply( x = DCNv3Function.apply(
...@@ -349,12 +351,12 @@ class DCNv3(nn.Module): ...@@ -349,12 +351,12 @@ class DCNv3(nn.Module):
mask = mask.view(N, H, W, self.group, -1) mask = mask.view(N, H, W, self.group, -1)
offset_mask = torch.cat([offset, mask], -1).view(N, H, W, -1).contiguous() offset_mask = torch.cat([offset, mask], -1).view(N, H, W, -1).contiguous()
# For efficiency, the last dimension of the offset_mask tensor in dcnv4 is a multiple of 8. # For efficiency, the last dimension of the offset_mask tensor in dcnv4 is a multiple of 8.
K3 = offset_mask.size(-1) K3 = offset_mask.size(-1)
K3_pad = int(math.ceil(K3/8)*8) K3_pad = int(math.ceil(K3 / 8) * 8)
pad_dim = K3_pad - K3 pad_dim = K3_pad - K3
offset_mask = torch.cat([offset_mask, offset_mask.new_zeros([*offset_mask.size()[:3], pad_dim])], -1) offset_mask = torch.cat([offset_mask, offset_mask.new_zeros([*offset_mask.size()[:3], pad_dim])], -1)
x = DCNv4Function.apply( x = DCNv4Function.apply(
x, offset_mask, x, offset_mask,
self.kernel_size, self.kernel_size, self.kernel_size, self.kernel_size,
......
...@@ -4,39 +4,34 @@ ...@@ -4,39 +4,34 @@
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# -------------------------------------------------------- # --------------------------------------------------------
import os
import glob import glob
import os
import torch import torch
from setuptools import find_packages, setup
from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME requirements = ['torch', 'torchvision']
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
from setuptools import find_packages
from setuptools import setup
requirements = ["torch", "torchvision"]
def get_extensions(): def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__)) this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "src") extensions_dir = os.path.join(this_dir, 'src')
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) main_file = glob.glob(os.path.join(extensions_dir, '*.cpp'))
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp'))
source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
sources = main_file + source_cpu sources = main_file + source_cpu
extension = CppExtension extension = CppExtension
extra_compile_args = {"cxx": []} extra_compile_args = {'cxx': []}
define_macros = [] define_macros = []
if torch.cuda.is_available() and CUDA_HOME is not None: if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension extension = CUDAExtension
sources += source_cuda sources += source_cuda
define_macros += [("WITH_CUDA", None)] define_macros += [('WITH_CUDA', None)]
extra_compile_args["nvcc"] = [ extra_compile_args['nvcc'] = [
# "-DCUDA_HAS_FP16=1", # "-DCUDA_HAS_FP16=1",
# "-D__CUDA_NO_HALF_OPERATORS__", # "-D__CUDA_NO_HALF_OPERATORS__",
# "-D__CUDA_NO_HALF_CONVERSIONS__", # "-D__CUDA_NO_HALF_CONVERSIONS__",
...@@ -49,7 +44,7 @@ def get_extensions(): ...@@ -49,7 +44,7 @@ def get_extensions():
include_dirs = [extensions_dir] include_dirs = [extensions_dir]
ext_modules = [ ext_modules = [
extension( extension(
"DCNv3", 'DCNv3',
sources, sources,
include_dirs=include_dirs, include_dirs=include_dirs,
define_macros=define_macros, define_macros=define_macros,
...@@ -60,16 +55,16 @@ def get_extensions(): ...@@ -60,16 +55,16 @@ def get_extensions():
setup( setup(
name="DCNv3", name='DCNv3',
version="1.0", version='1.0',
author="InternImage", author='InternImage',
url="https://github.com/OpenGVLab/InternImage", url='https://github.com/OpenGVLab/InternImage',
description= description=
"PyTorch Wrapper for CUDA Functions of DCNv3", 'PyTorch Wrapper for CUDA Functions of DCNv3',
packages=find_packages(exclude=( packages=find_packages(exclude=(
"configs", 'configs',
"tests", 'tests',
)), )),
ext_modules=get_extensions(), ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension},
) )
...@@ -171,4 +171,4 @@ dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset, ...@@ -171,4 +171,4 @@ dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,
} else { } else {
return {grad_input, grad_offset, grad_mask}; return {grad_input, grad_offset, grad_mask};
} }
} }
\ No newline at end of file
...@@ -1042,4 +1042,4 @@ void dcnv3_col2im_cuda( ...@@ -1042,4 +1042,4 @@ void dcnv3_col2im_cuda(
if (err != cudaSuccess) { if (err != cudaSuccess) {
printf("error in dcnv3_col2im_cuda: %s\n", cudaGetErrorString(err)); printf("error in dcnv3_col2im_cuda: %s\n", cudaGetErrorString(err));
} }
} }
\ No newline at end of file
...@@ -4,17 +4,15 @@ ...@@ -4,17 +4,15 @@
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# -------------------------------------------------------- # --------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import print_function
from __future__ import division
import math
import time import time
import torch import torch
import torch.nn as nn import torch.nn as nn
import math
from torch.autograd import gradcheck
from functions.dcnv3_func import DCNv3Function, dcnv3_core_pytorch from functions.dcnv3_func import DCNv3Function, dcnv3_core_pytorch
from torch.autograd import gradcheck
H_in, W_in = 8, 8 H_in, W_in = 8, 8
N, M, D = 2, 4, 16 N, M, D = 2, 4, 16
......
...@@ -22,4 +22,4 @@ srun -p ${PARTITION} \ ...@@ -22,4 +22,4 @@ srun -p ${PARTITION} \
--kill-on-bad-exit=1 \ --kill-on-bad-exit=1 \
--quotatype=spot \ --quotatype=spot \
${SRUN_ARGS} \ ${SRUN_ARGS} \
python -u train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} python -u train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
\ No newline at end of file
...@@ -11,6 +11,8 @@ import time ...@@ -11,6 +11,8 @@ import time
import warnings import warnings
import mmcv import mmcv
import mmcv_custom # noqa: F401,F403
import mmdet_custom # noqa: F401,F403
import torch import torch
from mmcv import Config, DictAction from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn from mmcv.cnn import fuse_conv_bn
...@@ -21,8 +23,6 @@ from mmdet.apis import multi_gpu_test, single_gpu_test ...@@ -21,8 +23,6 @@ from mmdet.apis import multi_gpu_test, single_gpu_test
from mmdet.datasets import (build_dataloader, build_dataset, from mmdet.datasets import (build_dataloader, build_dataset,
replace_ImageToTensor) replace_ImageToTensor)
from mmdet.models import build_detector from mmdet.models import build_detector
import mmdet_custom # noqa: F401,F403
import mmcv_custom # noqa: F401,F403
def parse_args(): def parse_args():
......
import argparse import argparse
import concurrent.futures
import json
import os import os
import pickle as pkl import pickle as pkl
import numpy as np
import random import random
from PIL import Image
import concurrent.futures
import json
import mmcv import mmcv
import numpy as np
from PIL import Image
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='Generate MMDetection Annotations for Crowdhuman-like dataset') parser = argparse.ArgumentParser(description='Generate MMDetection Annotations for Crowdhuman-like dataset')
...@@ -16,6 +18,7 @@ def parse_args(): ...@@ -16,6 +18,7 @@ def parse_args():
args = parser.parse_args() args = parser.parse_args()
return args.dataset, args.dataset_split return args.dataset, args.dataset_split
def load_func(fpath): def load_func(fpath):
assert os.path.exists(fpath) assert os.path.exists(fpath)
with open(fpath, 'r') as fid: with open(fpath, 'r') as fid:
...@@ -23,6 +26,7 @@ def load_func(fpath): ...@@ -23,6 +26,7 @@ def load_func(fpath):
records = [json.loads(line.strip('\n')) for line in lines] records = [json.loads(line.strip('\n')) for line in lines]
return records return records
def decode_annotations(records, dataset_path): def decode_annotations(records, dataset_path):
rec_ids = list(range(len(records))) rec_ids = list(range(len(records)))
img_list = [] img_list = []
...@@ -80,16 +84,17 @@ def decode_annotations(records, dataset_path): ...@@ -80,16 +84,17 @@ def decode_annotations(records, dataset_path):
) )
return json_dict return json_dict
if __name__ == "__main__":
if __name__ == '__main__':
dataset_name, dataset_type = parse_args() dataset_name, dataset_type = parse_args()
dataset_path = 'data/%s/' % dataset_name dataset_path = 'data/%s/' % dataset_name
ch_file_path = dataset_path + 'annotations/annotation_%s.odgt' % dataset_type ch_file_path = dataset_path + 'annotations/annotation_%s.odgt' % dataset_type
json_file_path = dataset_path + 'annotations/annotation_%s.json' % dataset_type json_file_path = dataset_path + 'annotations/annotation_%s.json' % dataset_type
records = load_func(ch_file_path) records = load_func(ch_file_path)
print("Loading Annotations Done") print('Loading Annotations Done')
json_dict = decode_annotations(records, dataset_path) json_dict = decode_annotations(records, dataset_path)
print("Parsing Bbox Number: %d" % len(json_dict['annotations'])) print('Parsing Bbox Number: %d' % len(json_dict['annotations']))
mmcv.dump(json_dict, json_file_path) mmcv.dump(json_dict, json_file_path)
from .compute_APMR import compute_APMR from .compute_APMR import compute_APMR
from .compute_JI import compute_JI_with_ignore from .compute_JI import compute_JI_with_ignore
\ No newline at end of file
...@@ -12,12 +12,13 @@ import time ...@@ -12,12 +12,13 @@ import time
import warnings import warnings
import mmcv import mmcv
import mmcv_custom # noqa: F401,F403
import mmdet_custom # noqa: F401,F403
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from mmcv import Config, DictAction from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist from mmcv.runner import get_dist_info, init_dist
from mmcv.utils import get_git_hash from mmcv.utils import get_git_hash
from mmdet import __version__ from mmdet import __version__
from mmdet.apis import init_random_seed, set_random_seed, train_detector from mmdet.apis import init_random_seed, set_random_seed, train_detector
from mmdet.datasets import build_dataset from mmdet.datasets import build_dataset
...@@ -25,8 +26,6 @@ from mmdet.models import build_detector ...@@ -25,8 +26,6 @@ from mmdet.models import build_detector
from mmdet.utils import (collect_env, get_device, get_root_logger, from mmdet.utils import (collect_env, get_device, get_root_logger,
replace_cfg_vals, setup_multi_processes, replace_cfg_vals, setup_multi_processes,
update_data_root) update_data_root)
import mmcv_custom # noqa: F401,F403
import mmdet_custom # noqa: F401,F403
def parse_args(): def parse_args():
...@@ -244,4 +243,4 @@ def main(): ...@@ -244,4 +243,4 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
main() main()
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp import os.path as osp
import pickle import pickle
import shutil import shutil
import tempfile import tempfile
import time import time
import numpy as np import mmcv
import numpy as np
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.nn.functional as F import torch.nn.functional as F
from mmcv.image import tensor2imgs
import mmcv from mmcv.runner import get_dist_info
from mmcv.image import tensor2imgs from mmdet.core import encode_mask_results
from mmcv.runner import get_dist_info
from mmdet.core import encode_mask_results def prompt_sam_with_bboxes(sam_predictor, data, box_result):
# process detector prediction
# (x1, y1, x2, y2), rescaled in original image space
def prompt_sam_with_bboxes(sam_predictor, data, box_result): bboxes = np.concatenate(box_result, axis=0)[..., :4]
# process detector prediction if len(bboxes) == 0:
# (x1, y1, x2, y2), rescaled in original image space return [[] for _ in range(len(box_result))]
bboxes = np.concatenate(box_result, axis=0)[..., :4] labels = np.concatenate([[i] * len(boxes) for i, boxes in enumerate(box_result)])
if len(bboxes) == 0:
return [[] for _ in range(len(box_result))] # prepare shapes
labels = np.concatenate([[i] * len(boxes) for i, boxes in enumerate(box_result)]) img_metas = data['img_metas'][0].data[0][0]
original_size = img_metas['ori_shape'][:2]
# prepare shapes
img_metas = data['img_metas'][0].data[0][0] # prepare input img of sam
original_size = img_metas['ori_shape'][:2] sam_predictor.reset_image()
# img has been normed (NOTE 2.x norm img in pipeline)
# prepare input img of sam img = data['img'][0] .to(sam_predictor.model.device)
sam_predictor.reset_image() # resize max length to 1024 and keep aspect ratio (ViT image encoder limitation)
# img has been normed (NOTE 2.x norm img in pipeline) target_size = sam_predictor.transform.get_preprocess_shape(
img = data['img'][0] .to(sam_predictor.model.device) img.shape[2], img.shape[3],
# resize max length to 1024 and keep aspect ratio (ViT image encoder limitation) sam_predictor.transform.target_length)
target_size = sam_predictor.transform.get_preprocess_shape( try:
img.shape[2], img.shape[3], # `antialias=True` is provided in official implementation of SAM,
sam_predictor.transform.target_length) # which may raise TypeError in PyTorch of previous versions.
try: transformed_img = F.interpolate(
# `antialias=True` is provided in official implementation of SAM, img, target_size, mode='bilinear',
# which may raise TypeError in PyTorch of previous versions. align_corners=False, antialias=True)
transformed_img = F.interpolate( except TypeError:
img, target_size, mode="bilinear", transformed_img = F.interpolate(
align_corners=False, antialias=True) img, target_size, mode='bilinear', align_corners=False)
except TypeError: # Pad to 1024 x 1024
transformed_img = F.interpolate( h, w = transformed_img.shape[-2:]
img, target_size, mode="bilinear", align_corners=False) pad_h = sam_predictor.model.image_encoder.img_size - h
# Pad to 1024 x 1024 pad_w = sam_predictor.model.image_encoder.img_size - w
h, w = transformed_img.shape[-2:] transformed_img = F.pad(transformed_img, (0, pad_w, 0, pad_h))
pad_h = sam_predictor.model.image_encoder.img_size - h
pad_w = sam_predictor.model.image_encoder.img_size - w # extract img feature
transformed_img = F.pad(transformed_img, (0, pad_w, 0, pad_h)) sam_predictor.features = sam_predictor.model.image_encoder(
transformed_img).to(sam_predictor.model.device)
# extract img feature
sam_predictor.features = sam_predictor.model.image_encoder( # set attributes
transformed_img).to(sam_predictor.model.device) sam_predictor.original_size = original_size
sam_predictor.input_size = tuple(transformed_img.shape[-2:])
# set attributes sam_predictor.is_image_set = True
sam_predictor.original_size = original_size
sam_predictor.input_size = tuple(transformed_img.shape[-2:]) # prepare bboxes and rescale bboxes to relative coordinates
sam_predictor.is_image_set = True bboxes_tensor = torch.from_numpy(bboxes).to(sam_predictor.model.device)
transformed_boxes = sam_predictor.transform.apply_boxes_torch(bboxes_tensor, original_size)
# prepare bboxes and rescale bboxes to relative coordinates
bboxes_tensor = torch.from_numpy(bboxes).to(sam_predictor.model.device) # prompt with bboxes
transformed_boxes = sam_predictor.transform.apply_boxes_torch(bboxes_tensor, original_size) batch_masks, _, _ = sam_predictor.predict_torch(
point_coords=None,
# prompt with bboxes point_labels=None,
batch_masks, _, _ = sam_predictor.predict_torch( boxes=transformed_boxes,
point_coords=None, multimask_output=False)
point_labels=None,
boxes=transformed_boxes, batch_masks = batch_masks.squeeze(1).cpu().numpy()
multimask_output=False)
mask_results = [[*batch_masks[labels == i]] for i in range(len(box_result))]
batch_masks = batch_masks.squeeze(1).cpu().numpy()
return mask_results
mask_results = [[*batch_masks[labels == i]] for i in range(len(box_result))]
return mask_results def single_gpu_test(model,
sam_predictor,
data_loader,
def single_gpu_test(model, show=False,
sam_predictor, out_dir=None,
data_loader, show_score_thr=0.3):
show=False, model.eval()
out_dir=None, results = []
show_score_thr=0.3): dataset = data_loader.dataset
model.eval() PALETTE = getattr(dataset, 'PALETTE', None)
results = [] prog_bar = mmcv.ProgressBar(len(dataset))
dataset = data_loader.dataset for i, data in enumerate(data_loader):
PALETTE = getattr(dataset, 'PALETTE', None) with torch.no_grad():
prog_bar = mmcv.ProgressBar(len(dataset)) # For instance segmentor, only the box results is used in the
for i, data in enumerate(data_loader): # second stage (prompt sam with box). NOTE the mask_head is still
with torch.no_grad(): # calculated, hence the FPS, FLOPS, maybe not accurate.
# For instance segmentor, only the box results is used in the result = model(return_loss=False, rescale=True, **data)
# second stage (prompt sam with box). NOTE the mask_head is still if getattr(model.module, 'with_mask', False):
# calculated, hence the FPS, FLOPS, maybe not accurate. box_result = result[0][0] # simple_test supported
result = model(return_loss=False, rescale=True, **data) mask_result = prompt_sam_with_bboxes(sam_predictor, data, box_result)
if getattr(model.module, 'with_mask', False): result = [(box_result, mask_result)]
box_result = result[0][0] # simple_test supported else:
mask_result = prompt_sam_with_bboxes(sam_predictor, data, box_result) raise NotImplementedError('WIP!')
result = [(box_result, mask_result)]
else: batch_size = len(result)
raise NotImplementedError('WIP!') if show or out_dir:
if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
batch_size = len(result) img_tensor = data['img'][0]
if show or out_dir: else:
if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): img_tensor = data['img'][0].data[0]
img_tensor = data['img'][0] img_metas = data['img_metas'][0].data[0]
else: imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
img_tensor = data['img'][0].data[0] assert len(imgs) == len(img_metas)
img_metas = data['img_metas'][0].data[0]
imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
assert len(imgs) == len(img_metas) h, w, _ = img_meta['img_shape']
img_show = img[:h, :w, :]
for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
h, w, _ = img_meta['img_shape'] ori_h, ori_w = img_meta['ori_shape'][:-1]
img_show = img[:h, :w, :] img_show = mmcv.imresize(img_show, (ori_w, ori_h))
ori_h, ori_w = img_meta['ori_shape'][:-1] if out_dir:
img_show = mmcv.imresize(img_show, (ori_w, ori_h)) out_file = osp.join(out_dir, img_meta['ori_filename'])
else:
if out_dir: out_file = None
out_file = osp.join(out_dir, img_meta['ori_filename'])
else: model.module.show_result(
out_file = None img_show,
result[i],
model.module.show_result( bbox_color=PALETTE,
img_show, text_color=PALETTE,
result[i], mask_color=PALETTE,
bbox_color=PALETTE, show=show,
text_color=PALETTE, out_file=out_file,
mask_color=PALETTE, score_thr=show_score_thr)
show=show,
out_file=out_file, # encode mask results
score_thr=show_score_thr) if isinstance(result[0], tuple):
result = [(bbox_results, encode_mask_results(mask_results))
# encode mask results for bbox_results, mask_results in result]
if isinstance(result[0], tuple): # This logic is only used in panoptic segmentation test.
result = [(bbox_results, encode_mask_results(mask_results)) elif isinstance(result[0], dict) and 'ins_results' in result[0]:
for bbox_results, mask_results in result] for j in range(len(result)):
# This logic is only used in panoptic segmentation test. bbox_results, mask_results = result[j]['ins_results']
elif isinstance(result[0], dict) and 'ins_results' in result[0]: result[j]['ins_results'] = (bbox_results,
for j in range(len(result)): encode_mask_results(mask_results))
bbox_results, mask_results = result[j]['ins_results']
result[j]['ins_results'] = (bbox_results, results.extend(result)
encode_mask_results(mask_results))
for _ in range(batch_size):
results.extend(result) prog_bar.update()
return results
for _ in range(batch_size):
prog_bar.update()
return results
# -------------------------------------------------------- # --------------------------------------------------------
# InternImage # InternImage
# Copyright (c) 2022 OpenGVLab # Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details] # Licensed under The MIT License [see LICENSE for details]
# -------------------------------------------------------- # --------------------------------------------------------
import argparse import argparse
import os import os
import os.path as osp import os.path as osp
import time import time
import warnings import warnings
import mmcv import mmcv
import torch import torch
from mmcv import Config, DictAction from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
wrap_fp16_model) wrap_fp16_model)
from mmdet.datasets import (build_dataloader, build_dataset, from mmdet.apis import multi_gpu_test
replace_ImageToTensor) from mmdet.datasets import (build_dataloader, build_dataset,
from mmdet.models import build_detector replace_ImageToTensor)
from mmdet.apis import multi_gpu_test from mmdet.models import build_detector
import detection.mmdet_custom # noqa: F401,F403 from segment_anything import SamPredictor, sam_model_registry
import detection.mmcv_custom # noqa: F401,F403
import detection.mmcv_custom # noqa: F401,F403
from segment_anything import sam_model_registry, SamPredictor import detection.mmdet_custom # noqa: F401,F403
try:
from .engine import single_gpu_test try:
except ImportError: from .engine import single_gpu_test
from sam.engine import single_gpu_test except ImportError:
from sam.engine import single_gpu_test
def parse_args():
parser = argparse.ArgumentParser( def parse_args():
description='Zero-shot instance segmentation evaluation for ' parser = argparse.ArgumentParser(
'SAM prompted by MMDet detector') description='Zero-shot instance segmentation evaluation for '
parser.add_argument('detector_cfg_path', 'SAM prompted by MMDet detector')
help='test config file path of MMDet detector') parser.add_argument('detector_cfg_path',
parser.add_argument('detector_ckpt_path', help='test config file path of MMDet detector')
help='checkpoint file path of MMDet detector') parser.add_argument('detector_ckpt_path',
parser.add_argument('sam_ckpt_path', default='vit_b', help='checkpoint file path of MMDet detector')
help='checkpoint file path of SAM') parser.add_argument('sam_ckpt_path', default='vit_b',
parser.add_argument('--sam_type', default='vit_b', help='checkpoint file path of SAM')
help='test config file path of MMDet detector') parser.add_argument('--sam_type', default='vit_b',
help='test config file path of MMDet detector')
parser.add_argument('--data_type', default='test', choices=['val', 'test'],
help='run val set or test set') parser.add_argument('--data_type', default='test', choices=['val', 'test'],
parser.add_argument( help='run val set or test set')
'--work-dir', parser.add_argument(
help='the directory to save the file containing evaluation metrics') '--work-dir',
parser.add_argument('--out', help='output result file in pickle format') help='the directory to save the file containing evaluation metrics')
parser.add_argument( parser.add_argument('--out', help='output result file in pickle format')
'--fuse-conv-bn', parser.add_argument(
action='store_true', '--fuse-conv-bn',
help='Whether to fuse conv and bn, this will slightly increase' action='store_true',
'the inference speed') help='Whether to fuse conv and bn, this will slightly increase'
parser.add_argument('--gpu-ids', 'the inference speed')
type=int, parser.add_argument('--gpu-ids',
nargs='+', type=int,
help='ids of gpus to use ' nargs='+',
'(only applicable to non-distributed testing)') help='ids of gpus to use '
parser.add_argument( '(only applicable to non-distributed testing)')
'--format-only', parser.add_argument(
action='store_true', '--format-only',
help='Format the output results without perform evaluation. It is' action='store_true',
'useful when you want to format the result to a specific format and ' help='Format the output results without perform evaluation. It is'
'submit it to the test server') 'useful when you want to format the result to a specific format and '
parser.add_argument( 'submit it to the test server')
'--eval', parser.add_argument(
type=str, '--eval',
nargs='+', type=str,
help='evaluation metrics, which depends on the dataset, e.g., "bbox",' nargs='+',
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
parser.add_argument('--show', action='store_true', help='show results') ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
parser.add_argument('--show-dir', parser.add_argument('--show', action='store_true', help='show results')
help='directory where painted images will be saved') parser.add_argument('--show-dir',
parser.add_argument('--show-score-thr', help='directory where painted images will be saved')
type=float, parser.add_argument('--show-score-thr',
default=0.3, type=float,
help='score threshold (default: 0.3)') default=0.3,
parser.add_argument('--gpu-collect', help='score threshold (default: 0.3)')
action='store_true', parser.add_argument('--gpu-collect',
help='whether to use gpu to collect results.') action='store_true',
parser.add_argument( help='whether to use gpu to collect results.')
'--tmpdir', parser.add_argument(
help='tmp directory used for collecting results from multiple ' '--tmpdir',
'workers, available when gpu-collect is not specified') help='tmp directory used for collecting results from multiple '
parser.add_argument( 'workers, available when gpu-collect is not specified')
'--cfg-options', parser.add_argument(
nargs='+', '--cfg-options',
action=DictAction, nargs='+',
help='override some settings in the used config, the key-value pair ' action=DictAction,
'in xxx=yyy format will be merged into config file. If the value to ' help='override some settings in the used config, the key-value pair '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'in xxx=yyy format will be merged into config file. If the value to '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'Note that the quotation marks are necessary and that no white space ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'is allowed.') 'Note that the quotation marks are necessary and that no white space '
parser.add_argument( 'is allowed.')
'--options', parser.add_argument(
nargs='+', '--options',
action=DictAction, nargs='+',
help='custom options for evaluation, the key-value pair in xxx=yyy ' action=DictAction,
'format will be kwargs for dataset.evaluate() function (deprecate), ' help='custom options for evaluation, the key-value pair in xxx=yyy '
'change to --eval-options instead.') 'format will be kwargs for dataset.evaluate() function (deprecate), '
parser.add_argument( 'change to --eval-options instead.')
'--eval-options', parser.add_argument(
nargs='+', '--eval-options',
action=DictAction, nargs='+',
help='custom options for evaluation, the key-value pair in xxx=yyy ' action=DictAction,
'format will be kwargs for dataset.evaluate() function') help='custom options for evaluation, the key-value pair in xxx=yyy '
parser.add_argument('--launcher', 'format will be kwargs for dataset.evaluate() function')
choices=['none', 'pytorch', 'slurm', 'mpi'], parser.add_argument('--launcher',
default='none', choices=['none', 'pytorch', 'slurm', 'mpi'],
help='job launcher') default='none',
parser.add_argument('--local_rank', type=int, default=0) help='job launcher')
args = parser.parse_args() parser.add_argument('--local_rank', type=int, default=0)
if 'LOCAL_RANK' not in os.environ: args = parser.parse_args()
os.environ['LOCAL_RANK'] = str(args.local_rank) if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
if args.options and args.eval_options:
raise ValueError( if args.options and args.eval_options:
'--options and --eval-options cannot be both ' raise ValueError(
'specified, --options is deprecated in favor of --eval-options') '--options and --eval-options cannot be both '
if args.options: 'specified, --options is deprecated in favor of --eval-options')
warnings.warn('--options is deprecated in favor of --eval-options') if args.options:
args.eval_options = args.options warnings.warn('--options is deprecated in favor of --eval-options')
return args args.eval_options = args.options
return args
def main():
print('!!!!!!!!!!!!!!!!!!1', flush=True) def main():
args = parse_args() print('!!!!!!!!!!!!!!!!!!1', flush=True)
args = parse_args()
assert args.out or args.eval or args.format_only or args.show \
or args.show_dir, \ assert args.out or args.eval or args.format_only or args.show \
('Please specify at least one operation (save/eval/format/show the ' or args.show_dir, \
'results / save the results) with the argument "--out", "--eval"' ('Please specify at least one operation (save/eval/format/show the '
', "--format-only", "--show" or "--show-dir"') 'results / save the results) with the argument "--out", "--eval"'
', "--format-only", "--show" or "--show-dir"')
if args.eval and args.format_only:
raise ValueError('--eval and --format_only cannot be both specified') if args.eval and args.format_only:
raise ValueError('--eval and --format_only cannot be both specified')
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.') if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
cfg = Config.fromfile(args.detector_cfg_path)
if args.cfg_options is not None: cfg = Config.fromfile(args.detector_cfg_path)
cfg.merge_from_dict(args.cfg_options) if args.cfg_options is not None:
# set cudnn_benchmark cfg.merge_from_dict(args.cfg_options)
if cfg.get('cudnn_benchmark', False): # set cudnn_benchmark
torch.backends.cudnn.benchmark = True if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
if cfg.model.get('neck'): cfg.model.pretrained = None
if isinstance(cfg.model.neck, list): if cfg.model.get('neck'):
for neck_cfg in cfg.model.neck: if isinstance(cfg.model.neck, list):
if neck_cfg.get('rfp_backbone'): for neck_cfg in cfg.model.neck:
if neck_cfg.rfp_backbone.get('pretrained'): if neck_cfg.get('rfp_backbone'):
neck_cfg.rfp_backbone.pretrained = None if neck_cfg.rfp_backbone.get('pretrained'):
elif cfg.model.neck.get('rfp_backbone'): neck_cfg.rfp_backbone.pretrained = None
if cfg.model.neck.rfp_backbone.get('pretrained'): elif cfg.model.neck.get('rfp_backbone'):
cfg.model.neck.rfp_backbone.pretrained = None if cfg.model.neck.rfp_backbone.get('pretrained'):
cfg.model.neck.rfp_backbone.pretrained = None
# in case the test dataset is concatenated
samples_per_gpu = 1 # in case the test dataset is concatenated
if isinstance(cfg.data.test, dict): samples_per_gpu = 1
cfg.data.test.test_mode = True if isinstance(cfg.data.test, dict):
samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) cfg.data.test.test_mode = True
if samples_per_gpu > 1: samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
# Replace 'ImageToTensor' to 'DefaultFormatBundle' if samples_per_gpu > 1:
cfg.data.test.pipeline = replace_ImageToTensor( # Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline) cfg.data.test.pipeline = replace_ImageToTensor(
elif isinstance(cfg.data.test, list): cfg.data.test.pipeline)
for ds_cfg in cfg.data.test: elif isinstance(cfg.data.test, list):
ds_cfg.test_mode = True for ds_cfg in cfg.data.test:
samples_per_gpu = max( ds_cfg.test_mode = True
[ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test]) samples_per_gpu = max(
if samples_per_gpu > 1: [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
for ds_cfg in cfg.data.test: if samples_per_gpu > 1:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids if args.gpu_ids is not None:
else: cfg.gpu_ids = args.gpu_ids
cfg.gpu_ids = range(1) else:
cfg.gpu_ids = range(1)
print('!!!!!!!!!!!!!!!!!!2', flush=True)
# init distributed env first, since logger depends on the dist info. print('!!!!!!!!!!!!!!!!!!2', flush=True)
if args.launcher == 'none': # init distributed env first, since logger depends on the dist info.
distributed = False if args.launcher == 'none':
if len(cfg.gpu_ids) > 1: distributed = False
warnings.warn( if len(cfg.gpu_ids) > 1:
f'We treat {cfg.gpu_ids} as gpu-ids, and reset to ' warnings.warn(
f'{cfg.gpu_ids[0:1]} as gpu-ids to avoid potential error in ' f'We treat {cfg.gpu_ids} as gpu-ids, and reset to '
'non-distribute testing time.') f'{cfg.gpu_ids[0:1]} as gpu-ids to avoid potential error in '
cfg.gpu_ids = cfg.gpu_ids[0:1] 'non-distribute testing time.')
else: cfg.gpu_ids = cfg.gpu_ids[0:1]
distributed = True else:
init_dist(args.launcher, **cfg.dist_params) distributed = True
print('!!!!!!!!!!!!!!!!!!3', flush=True) init_dist(args.launcher, **cfg.dist_params)
print('!!!!!!!!!!!!!!!!!!3', flush=True)
rank, _ = get_dist_info()
# allows not to create rank, _ = get_dist_info()
if args.work_dir is not None and rank == 0: # allows not to create
mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) if args.work_dir is not None and rank == 0:
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) mmcv.mkdir_or_exist(osp.abspath(args.work_dir))
json_file = osp.join(args.work_dir, f'eval_{timestamp}.json') timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
json_file = osp.join(args.work_dir, f'eval_{timestamp}.json')
# build the dataloader
dataset = build_dataset(cfg.data.test) # build the dataloader
data_loader = build_dataloader(dataset, dataset = build_dataset(cfg.data.test)
samples_per_gpu=samples_per_gpu, data_loader = build_dataloader(dataset,
workers_per_gpu=cfg.data.workers_per_gpu, samples_per_gpu=samples_per_gpu,
dist=distributed, workers_per_gpu=cfg.data.workers_per_gpu,
shuffle=False) dist=distributed,
shuffle=False)
# build the detector and load checkpoint
cfg.model.train_cfg = None # build the detector and load checkpoint
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) cfg.model.train_cfg = None
fp16_cfg = cfg.get('fp16', None) model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
if fp16_cfg is not None: fp16_cfg = cfg.get('fp16', None)
wrap_fp16_model(model) if fp16_cfg is not None:
checkpoint = load_checkpoint(model, args.detector_ckpt_path, map_location='cpu') wrap_fp16_model(model)
checkpoint = load_checkpoint(model, args.detector_ckpt_path, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_conv_bn(model) if args.fuse_conv_bn:
# old versions did not save class info in checkpoints, this walkaround is model = fuse_conv_bn(model)
# for backward compatibility # old versions did not save class info in checkpoints, this walkaround is
if 'CLASSES' in checkpoint.get('meta', {}): # for backward compatibility
model.CLASSES = checkpoint['meta']['CLASSES'] if 'CLASSES' in checkpoint.get('meta', {}):
else: model.CLASSES = checkpoint['meta']['CLASSES']
model.CLASSES = dataset.CLASSES else:
model.CLASSES = dataset.CLASSES
if not distributed:
model = MMDataParallel(model, device_ids=cfg.gpu_ids) if not distributed:
# The SamPredictor will be invalid If model is wrapped using MMDataParallel model = MMDataParallel(model, device_ids=cfg.gpu_ids)
# A better implementation will be not to use the provided SamPredictor API # The SamPredictor will be invalid If model is wrapped using MMDataParallel
sam = sam_model_registry[args.sam_type]( # A better implementation will be not to use the provided SamPredictor API
checkpoint=args.sam_ckpt_path).to( sam = sam_model_registry[args.sam_type](
list(model.module.parameters())[0].device) checkpoint=args.sam_ckpt_path).to(
sam_predictor = SamPredictor(sam) list(model.module.parameters())[0].device)
outputs = single_gpu_test(model, sam_predictor, data_loader, args.show, sam_predictor = SamPredictor(sam)
args.show_dir, args.show_score_thr) outputs = single_gpu_test(model, sam_predictor, data_loader, args.show,
else: args.show_dir, args.show_score_thr)
raise NotImplementedError else:
model = MMDistributedDataParallel( raise NotImplementedError
model.cuda(), model = MMDistributedDataParallel(
device_ids=[torch.cuda.current_device()], model.cuda(),
broadcast_buffers=False) device_ids=[torch.cuda.current_device()],
outputs = multi_gpu_test(model, data_loader, args.tmpdir, broadcast_buffers=False)
args.gpu_collect) outputs = multi_gpu_test(model, data_loader, args.tmpdir,
args.gpu_collect)
rank, _ = get_dist_info()
if rank == 0: rank, _ = get_dist_info()
if args.out: if rank == 0:
print(f'\nwriting results to {args.out}') if args.out:
mmcv.dump(outputs, args.out) print(f'\nwriting results to {args.out}')
kwargs = {} if args.eval_options is None else args.eval_options mmcv.dump(outputs, args.out)
if args.format_only: kwargs = {} if args.eval_options is None else args.eval_options
dataset.format_results(outputs, **kwargs) if args.format_only:
if args.eval: dataset.format_results(outputs, **kwargs)
eval_kwargs = cfg.get('evaluation', {}).copy() if args.eval:
# hard-code way to remove EvalHook args eval_kwargs = cfg.get('evaluation', {}).copy()
for key in [ # hard-code way to remove EvalHook args
'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', for key in [
'rule', 'dynamic_intervals' 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
]: 'rule', 'dynamic_intervals'
eval_kwargs.pop(key, None) ]:
eval_kwargs.update(dict(metric=args.eval, **kwargs)) eval_kwargs.pop(key, None)
metric = dataset.evaluate(outputs, **eval_kwargs) eval_kwargs.update(dict(metric=args.eval, **kwargs))
print(metric) metric = dataset.evaluate(outputs, **eval_kwargs)
metric_dict = dict(config=args.detector_cfg_path, metric=metric) print(metric)
if args.work_dir is not None and rank == 0: metric_dict = dict(config=args.detector_cfg_path, metric=metric)
mmcv.dump(metric_dict, json_file) if args.work_dir is not None and rank == 0:
mmcv.dump(metric_dict, json_file)
if __name__ == '__main__':
main() if __name__ == '__main__':
main()
# InternImage for Semantic Segmentation # InternImage for Semantic Segmentation
This folder contains the implementation of the InternImage for semantic segmentation. This folder contains the implementation of the InternImage for semantic segmentation.
Our segmentation code is developed on top of [MMSegmentation v0.27.0](https://github.com/open-mmlab/mmsegmentation/tree/v0.27.0). Our segmentation code is developed on top of [MMSegmentation v0.27.0](https://github.com/open-mmlab/mmsegmentation/tree/v0.27.0).
...@@ -27,6 +27,7 @@ conda activate internimage ...@@ -27,6 +27,7 @@ conda activate internimage
- Install `PyTorch>=1.10.0` and `torchvision>=0.9.0` with `CUDA>=10.2`: - Install `PyTorch>=1.10.0` and `torchvision>=0.9.0` with `CUDA>=10.2`:
For examples, to install torch==1.11 with CUDA==11.3 and nvcc: For examples, to install torch==1.11 with CUDA==11.3 and nvcc:
```bash ```bash
conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch -y conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch -y
conda install -c conda-forge cudatoolkit-dev=11.3 -y # to install nvcc conda install -c conda-forge cudatoolkit-dev=11.3 -y # to install nvcc
...@@ -34,14 +35,14 @@ conda install -c conda-forge cudatoolkit-dev=11.3 -y # to install nvcc ...@@ -34,14 +35,14 @@ conda install -c conda-forge cudatoolkit-dev=11.3 -y # to install nvcc
- Install other requirements: - Install other requirements:
note: conda opencv will break torchvision as not to support GPU, so we need to install opencv using pip. note: conda opencv will break torchvision as not to support GPU, so we need to install opencv using pip.
```bash ```bash
conda install -c conda-forge termcolor yacs pyyaml scipy pip -y conda install -c conda-forge termcolor yacs pyyaml scipy pip -y
pip install opencv-python pip install opencv-python
``` ```
- Install `timm` and `mmcv-full` and `mmsegmentation': - Install `timm` and `mmcv-full` and \`mmsegmentation':
```bash ```bash
pip install -U openmim pip install -U openmim
...@@ -51,20 +52,21 @@ pip install timm==0.6.11 mmdet==2.28.1 ...@@ -51,20 +52,21 @@ pip install timm==0.6.11 mmdet==2.28.1
``` ```
- Compile CUDA operators - Compile CUDA operators
```bash ```bash
cd ./ops_dcnv3 cd ./ops_dcnv3
sh ./make.sh sh ./make.sh
# unit test (should see all checking is True) # unit test (should see all checking is True)
python test.py python test.py
``` ```
- You can also install the operator using .whl files - You can also install the operator using .whl files
[DCNv3-1.0-whl](https://github.com/OpenGVLab/InternImage/releases/tag/whl_files) [DCNv3-1.0-whl](https://github.com/OpenGVLab/InternImage/releases/tag/whl_files)
### Data Preparation ### Data Preparation
Prepare datasets according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets) in MMSegmentation. Prepare datasets according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets) in MMSegmentation.
### Evaluation ### Evaluation
To evaluate our `InternImage` on ADE20K val, run: To evaluate our `InternImage` on ADE20K val, run:
...@@ -72,6 +74,7 @@ To evaluate our `InternImage` on ADE20K val, run: ...@@ -72,6 +74,7 @@ To evaluate our `InternImage` on ADE20K val, run:
```bash ```bash
sh dist_test.sh <config-file> <checkpoint> <gpu-num> --eval mIoU sh dist_test.sh <config-file> <checkpoint> <gpu-num> --eval mIoU
``` ```
You can download checkpoint files from [here](https://huggingface.co/OpenGVLab/InternImage/tree/fc1e4e7e01c3e7a39a3875bdebb6577a7256ff91). Then place it to segmentation/checkpoint_dir/seg. You can download checkpoint files from [here](https://huggingface.co/OpenGVLab/InternImage/tree/fc1e4e7e01c3e7a39a3875bdebb6577a7256ff91). Then place it to segmentation/checkpoint_dir/seg.
For example, to evaluate the `InternImage-T` with a single GPU: For example, to evaluate the `InternImage-T` with a single GPU:
...@@ -109,19 +112,22 @@ GPUS=8 sh slurm_train.sh <partition> <job-name> configs/ade20k/upernet_internima ...@@ -109,19 +112,22 @@ GPUS=8 sh slurm_train.sh <partition> <job-name> configs/ade20k/upernet_internima
``` ```
### Image Demo ### Image Demo
To inference a single/multiple image like this. To inference a single/multiple image like this.
If you specify image containing directory instead of a single image, it will process all the images in the directory.: If you specify image containing directory instead of a single image, it will process all the images in the directory.:
``` ```
CUDA_VISIBLE_DEVICES=0 python image_demo.py \ CUDA_VISIBLE_DEVICES=0 python image_demo.py \
data/ade/ADEChallengeData2016/images/validation/ADE_val_00000591.jpg \ data/ade/ADEChallengeData2016/images/validation/ADE_val_00000591.jpg \
configs/ade20k/upernet_internimage_t_512_160k_ade20k.py \ configs/ade20k/upernet_internimage_t_512_160k_ade20k.py \
checkpoint_dir/seg/upernet_internimage_t_512_160k_ade20k.pth \ checkpoint_dir/seg/upernet_internimage_t_512_160k_ade20k.pth \
--palette ade20k --palette ade20k
``` ```
### Export ### Export
To export a segmentation model from PyTorch to TensorRT, run: To export a segmentation model from PyTorch to TensorRT, run:
```shell ```shell
MODEL="model_name" MODEL="model_name"
CKPT_PATH="/path/to/model/ckpt.pth" CKPT_PATH="/path/to/model/ckpt.pth"
...@@ -137,6 +143,7 @@ python deploy.py \ ...@@ -137,6 +143,7 @@ python deploy.py \
``` ```
For example, to export `upernet_internimage_t_512_160k_ade20k` from PyTorch to TensorRT, run: For example, to export `upernet_internimage_t_512_160k_ade20k` from PyTorch to TensorRT, run:
```shell ```shell
MODEL="upernet_internimage_t_512_160k_ade20k" MODEL="upernet_internimage_t_512_160k_ade20k"
CKPT_PATH="/path/to/model/ckpt/upernet_internimage_t_512_160k_ade20k.pth" CKPT_PATH="/path/to/model/ckpt/upernet_internimage_t_512_160k_ade20k.pth"
......
...@@ -135,4 +135,4 @@ model = dict( ...@@ -135,4 +135,4 @@ model = dict(
filter_low_score=True), filter_low_score=True),
init_cfg=None) init_cfg=None)
# find_unused_parameters = True # find_unused_parameters = True
\ No newline at end of file
...@@ -31,4 +31,4 @@ model = dict( ...@@ -31,4 +31,4 @@ model = dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings # model training and testing settings
train_cfg=dict(), train_cfg=dict(),
test_cfg=dict(mode='whole')) test_cfg=dict(mode='whole'))
\ No newline at end of file
...@@ -4,28 +4,25 @@ Introduced by Zhou et al. in [Scene Parsing Through ADE20K Dataset](https://pape ...@@ -4,28 +4,25 @@ Introduced by Zhou et al. in [Scene Parsing Through ADE20K Dataset](https://pape
The ADE20K semantic segmentation dataset contains more than 20K scene-centric images exhaustively annotated with pixel-level objects and object parts labels. There are totally 150 semantic categories, which include stuffs like sky, road, grass, and discrete objects like person, car, bed. The ADE20K semantic segmentation dataset contains more than 20K scene-centric images exhaustively annotated with pixel-level objects and object parts labels. There are totally 150 semantic categories, which include stuffs like sky, road, grass, and discrete objects like person, car, bed.
## Model Zoo ## Model Zoo
### UperNet + InternImage ### UperNet + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download |
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download | | :------------: | :--------: | :----------: | :----------: | :--------: | :----: | :---: | :---------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:| | InternImage-T | 512x512 | 47.9 / 48.1 | 0.23s / iter | 10.5h | 59M | 944G | [config](./upernet_internimage_t_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512_160k_ade20k.log.json) |
| InternImage-T | 512x512 | 47.9 / 48.1 | 0.23s / iter | 10.5h | 59M | 944G | [config](./upernet_internimage_t_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512_160k_ade20k.log.json) | | InternImage-S | 512x512 | 50.1 / 50.9 | 0.25s / iter | 11.5h | 80M | 1017G | [config](./upernet_internimage_s_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512_160k_ade20k.log.json) |
| InternImage-S | 512x512 | 50.1 / 50.9 | 0.25s / iter | 11.5h | 80M | 1017G | [config](./upernet_internimage_s_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512_160k_ade20k.log.json) | | InternImage-B | 512x512 | 50.8 / 51.3 | 0.26s / iter | 12h | 128M | 1185G | [config](./upernet_internimage_b_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512_160k_ade20k.log.json) |
| InternImage-B | 512x512 | 50.8 / 51.3 | 0.26s / iter | 12h | 128M | 1185G | [config](./upernet_internimage_b_512_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512_160k_ade20k.log.json) | | InternImage-L | 640x640 | 53.9 / 54.1 | 0.42s / iter | 19h | 256M | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_640_160k_ade20k.log.json) |
| InternImage-L | 640x640 | 53.9 / 54.1 | 0.42s / iter | 19h | 256M | 2526G | [config](./upernet_internimage_l_640_160k_ade20k.py)| [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_640_160k_ade20k.log.json) | | InternImage-XL | 640x640 | 55.0 / 55.3 | 0.47s / iter | 22h | 368M | 3142G | [config](./upernet_internimage_xl_640_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_640_160k_ade20k.log.json) |
| InternImage-XL | 640x640 | 55.0 / 55.3 | 0.47s / iter | 22h | 368M | 3142G | [config](./upernet_internimage_xl_640_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_640_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_640_160k_ade20k.log.json) | | InternImage-H | 896x896 | 59.9 / 60.3 | 0.94s / iter | 2d (2n) | 1.12B | 3566G | [config](./upernet_internimage_h_896_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_h_896_160k_ade20k.log.json) |
| InternImage-H | 896x896 | 59.9 / 60.3 | 0.94s / iter | 2d (2n) | 1.12B | 3566G | [config](./upernet_internimage_h_896_160k_ade20k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_h_896_160k_ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_h_896_160k_ade20k.log.json) |
- Training speed is measured with A100 GPU. - Training speed is measured with A100 GPU.
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues. - Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
- The logs are our recent newly trained ones. There are slight differences between the results in logs and our paper. - The logs are our recent newly trained ones. There are slight differences between the results in logs and our paper.
### Mask2Former + InternImage ### Mask2Former + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #param | FLOPs | Config | Download |
|:--------------:|:----------:|:-----------:|:-----------:|:----------:|:-------:|:-----:|:-----:|:-------------------:| | :-----------: | :--------: | :----------: | :----------: | :--------: | :----: | :---: | :------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-H | 896x896 | 62.6 / 62.9 | 1.21s / iter | 1.5d (2n) | 1.31B | 4635G | [config](./mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.log.json) | | InternImage-H | 896x896 | 62.6 / 62.9 | 1.21s / iter | 1.5d (2n) | 1.31B | 4635G | [config](./mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_896_80k_cocostuff2ade20k.log.json) |
...@@ -161,4 +161,3 @@ optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2) ...@@ -161,4 +161,3 @@ optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2)
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1) checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=2000, metric='mIoU', save_best='mIoU') evaluation = dict(interval=2000, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512)) # fp16 = dict(loss_scale=dict(init_scale=512))
...@@ -8,31 +8,31 @@ Cityscapes is a large-scale database which focuses on semantic understanding of ...@@ -8,31 +8,31 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
### UperNet + InternImage ### UperNet + InternImage
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:| | :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-T | 512x1024 | 82.58 / 83.40 | 0.32s / iter | 14.5h | 59M | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512x1024_160k_cityscapes.log.json) | | InternImage-T | 512x1024 | 82.58 / 83.40 | 0.32s / iter | 14.5h | 59M | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512x1024_160k_cityscapes.log.json) |
| InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json) | | InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json) |
| InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json) | | InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json) |
| InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json) | | InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json) |
| InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) | | InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
- Training speed is measured with A100 GPU. - Training speed is measured with A100 GPU.
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues. - Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
### UperNet + InternImage (with additional data) ### UperNet + InternImage (with additional data)
Mapillary 80k + Cityscapes (w/ coarse data) 160k Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:| | :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :----------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-L | 512x1024 | 85.94 / 86.22 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-L | 512x1024 | 85.94 / 86.22 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 86.20 / 86.42 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-XL | 512x1024 | 86.20 / 86.42 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
### SegFormerHead + InternImage (with additional data) ### SegFormerHead + InternImage (with additional data)
Mapillary 80k + Cityscapes (w/ coarse data) 160k Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:| | :------------: | :--------: | :-----------: | :----------: | :--------: | :-----: | :---: | :------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-L | 512x1024 | 85.16 / 85.67 | 0.37s / iter | 17h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-L | 512x1024 | 85.16 / 85.67 | 0.37s / iter | 17h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 85.41 / 85.93 | 0.43s / iter | 19.5h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-XL | 512x1024 | 85.41 / 85.93 | 0.43s / iter | 19.5h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment