Unverified Commit 6efefa27 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #20 from open-mmlab/dev

Initial public release
parents 2cf13281 54b54d88
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='roi_pool',
ext_modules=[
CUDAExtension('roi_pool_cuda', [
'src/roi_pool_cuda.cpp',
'src/roi_pool_kernel.cu',
])
],
cmdclass={'build_ext': BuildExtension})
#include <torch/torch.h>
#include <cmath>
#include <vector>
int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
const float spatial_scale, const int channels,
const int height, const int width, const int num_rois,
const int pooled_h, const int pooled_w,
at::Tensor output, at::Tensor argmax);
int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
const at::Tensor argmax, const float spatial_scale,
const int batch_size, const int channels,
const int height, const int width,
const int num_rois, const int pooled_h,
const int pooled_w, at::Tensor bottom_grad);
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
int pooled_height, int pooled_width,
float spatial_scale, at::Tensor output,
at::Tensor argmax) {
CHECK_INPUT(features);
CHECK_INPUT(rois);
CHECK_INPUT(output);
CHECK_INPUT(argmax);
// Number of ROIs
int num_rois = rois.size(0);
int size_rois = rois.size(1);
if (size_rois != 5) {
printf("wrong roi size\n");
return 0;
}
int channels = features.size(1);
int height = features.size(2);
int width = features.size(3);
ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
num_rois, pooled_height, pooled_width, output, argmax);
return 1;
}
int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
at::Tensor argmax, float spatial_scale,
at::Tensor bottom_grad) {
CHECK_INPUT(top_grad);
CHECK_INPUT(rois);
CHECK_INPUT(argmax);
CHECK_INPUT(bottom_grad);
int pooled_height = top_grad.size(2);
int pooled_width = top_grad.size(3);
int num_rois = rois.size(0);
int size_rois = rois.size(1);
if (size_rois != 5) {
printf("wrong roi size\n");
return 0;
}
int batch_size = bottom_grad.size(0);
int channels = bottom_grad.size(1);
int height = bottom_grad.size(2);
int width = bottom_grad.size(3);
ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
channels, height, width, num_rois, pooled_height,
pooled_width, bottom_grad);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
}
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848)
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
#define THREADS_PER_BLOCK 1024
inline int GET_BLOCKS(const int N) {
int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
int max_block_num = 65000;
return min(optimal_block_num, max_block_num);
}
template <typename scalar_t>
__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
const scalar_t *rois,
const scalar_t spatial_scale, const int channels,
const int height, const int width,
const int pooled_h, const int pooled_w,
scalar_t *top_data, int *argmax_data) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
// (n, c, ph, pw) is an element in the pooled output
int pw = index % pooled_w;
int ph = (index / pooled_w) % pooled_h;
int c = (index / pooled_w / pooled_h) % channels;
int n = index / pooled_w / pooled_h / channels;
const scalar_t *offset_rois = rois + n * 5;
int roi_batch_ind = offset_rois[0];
// calculate the roi region on feature maps
scalar_t roi_x1 = offset_rois[1] * spatial_scale;
scalar_t roi_y1 = offset_rois[2] * spatial_scale;
scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale;
scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale;
// force malformed rois to be 1x1
scalar_t roi_w = roi_x2 - roi_x1;
scalar_t roi_h = roi_y2 - roi_y1;
if (roi_w <= 0 || roi_h <= 0) continue;
scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w);
scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h);
// the corresponding bin region
int bin_x1 = floor(static_cast<scalar_t>(pw) * bin_size_w + roi_x1);
int bin_y1 = floor(static_cast<scalar_t>(ph) * bin_size_h + roi_y1);
int bin_x2 = ceil(static_cast<scalar_t>(pw + 1) * bin_size_w + roi_x1);
int bin_y2 = ceil(static_cast<scalar_t>(ph + 1) * bin_size_h + roi_y1);
// add roi offsets and clip to input boundaries
bin_x1 = min(max(bin_x1, 0), width);
bin_y1 = min(max(bin_y1, 0), height);
bin_x2 = min(max(bin_x2, 0), width);
bin_y2 = min(max(bin_y2, 0), height);
bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);
// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
int max_idx = -1;
bottom_data += (roi_batch_ind * channels + c) * height * width;
// Define an empty pooling region to be zero
scalar_t max_val = is_empty ? static_cast<scalar_t>(0)
: bottom_data[bin_y1 * width + bin_x1] - 1;
for (int h = bin_y1; h < bin_y2; ++h) {
for (int w = bin_x1; w < bin_x2; ++w) {
int offset = h * width + w;
if (bottom_data[offset] > max_val) {
max_val = bottom_data[offset];
max_idx = offset;
}
}
}
top_data[index] = max_val;
if (argmax_data != NULL) argmax_data[index] = max_idx;
}
}
int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
const float spatial_scale, const int channels,
const int height, const int width, const int num_rois,
const int pooled_h, const int pooled_w,
at::Tensor output, at::Tensor argmax) {
const int output_size = num_rois * channels * pooled_h * pooled_w;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
features.type(), "ROIPoolLaucherForward", ([&] {
const scalar_t *bottom_data = features.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
scalar_t *top_data = output.data<scalar_t>();
int *argmax_data = argmax.data<int>();
ROIPoolForward<scalar_t>
<<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
output_size, bottom_data, rois_data, scalar_t(spatial_scale),
channels, height, width, pooled_h, pooled_w, top_data,
argmax_data);
}));
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
return 1;
}
template <typename scalar_t>
__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff,
const scalar_t *rois, const int *argmax_data,
const scalar_t spatial_scale,
const int channels, const int height,
const int width, const int pooled_h,
const int pooled_w, scalar_t *bottom_diff) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
int pw = index % pooled_w;
int ph = (index / pooled_w) % pooled_h;
int c = (index / pooled_w / pooled_h) % channels;
int n = index / pooled_w / pooled_h / channels;
int roi_batch_ind = rois[n * 5];
int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +
ph * pooled_w + pw];
atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width +
bottom_index,
top_diff[index]);
}
}
int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
const at::Tensor argmax, const float spatial_scale,
const int batch_size, const int channels,
const int height, const int width,
const int num_rois, const int pooled_h,
const int pooled_w, at::Tensor bottom_grad) {
const int output_size = num_rois * pooled_h * pooled_w * channels;
// TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved
AT_DISPATCH_FLOATING_TYPES(
top_grad.type(), "ROIPoolLaucherBackward", ([&] {
const scalar_t *top_diff = top_grad.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
const int *argmax_data = argmax.data<int>();
scalar_t *bottom_diff = bottom_grad.data<scalar_t>();
if (sizeof(scalar_t) == sizeof(double)) {
fprintf(stderr, "double is not supported\n");
exit(-1);
}
ROIPoolBackward<scalar_t>
<<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
output_size, top_diff, rois_data, argmax_data,
scalar_t(spatial_scale), channels, height, width, pooled_h,
pooled_w, bottom_diff);
}));
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
return 1;
}
import os
import subprocess
import time
from setuptools import find_packages, setup
def readme():
with open('README.md') as f:
content = f.read()
return content
MAJOR = 0
MINOR = 5
PATCH = 0
SUFFIX = ''
SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
version_file = 'mmdet/version.py'
def get_git_hash():
def _minimal_ext_cmd(cmd):
# construct minimal environment
env = {}
for k in ['SYSTEMROOT', 'PATH', 'HOME']:
v = os.environ.get(k)
if v is not None:
env[k] = v
# LANGUAGE is used on win32
env['LANGUAGE'] = 'C'
env['LANG'] = 'C'
env['LC_ALL'] = 'C'
out = subprocess.Popen(
cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
return out
try:
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
sha = out.strip().decode('ascii')
except OSError:
sha = 'unknown'
return sha
def get_hash():
if os.path.exists('.git'):
sha = get_git_hash()[:7]
elif os.path.exists(version_file):
try:
from mmdet.version import __version__
sha = __version__.split('+')[-1]
except ImportError:
raise ImportError('Unable to get git version')
else:
sha = 'unknown'
return sha
def write_version_py():
content = """# GENERATED VERSION FILE
# TIME: {}
__version__ = '{}'
short_version = '{}'
"""
sha = get_hash()
VERSION = SHORT_VERSION + '+' + sha
with open(version_file, 'w') as f:
f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
def get_version():
with open(version_file, 'r') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
if __name__ == '__main__':
write_version_py()
setup(
name='mmdet',
version=get_version(),
description='Open MMLab Detection Toolbox',
long_description=readme(),
keywords='computer vision, object detection',
url='https://github.com/open-mmlab/mmdetection',
packages=find_packages(),
package_data={'mmdet.ops': ['*/*.so']},
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],
license='GPLv3',
setup_requires=['pytest-runner'],
tests_require=['pytest'],
install_requires=[
'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables',
'pycocotools'
],
zip_safe=False)
from argparse import ArgumentParser
from mmdet.core import coco_eval
def main():
parser = ArgumentParser(description='COCO Evaluation')
parser.add_argument('result', help='result file path')
parser.add_argument('--ann', help='annotation file path')
parser.add_argument(
'--types',
type=str,
nargs='+',
choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
default=['bbox'],
help='result types')
parser.add_argument(
'--max-dets',
type=int,
nargs='+',
default=[100, 300, 1000],
help='proposal numbers, only used for recall evaluation')
args = parser.parse_args()
coco_eval(args.result, args.types, args.ann, args.max_dets)
if __name__ == '__main__':
main()
#!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3}
import argparse
import torch
import mmcv
from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict
from mmcv.parallel import scatter, collate, MMDataParallel
from mmdet import datasets
from mmdet.core import results2json, coco_eval
from mmdet.datasets import build_dataloader
from mmdet.models import build_detector, detectors
def single_test(model, data_loader, show=False):
model.eval()
results = []
prog_bar = mmcv.ProgressBar(len(data_loader.dataset))
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=not show, **data)
results.append(result)
if show:
model.module.show_result(data, result,
data_loader.dataset.img_norm_cfg)
batch_size = data['img'][0].size(0)
for _ in range(batch_size):
prog_bar.update()
return results
def _data_func(data, device_id):
data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
return dict(return_loss=False, rescale=True, **data)
def parse_args():
parser = argparse.ArgumentParser(description='MMDet test detector')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--gpus', default=1, type=int, help='GPU number used for testing')
parser.add_argument(
'--proc_per_gpu',
default=1,
type=int,
help='Number of processes per GPU')
parser.add_argument('--out', help='output result file')
parser.add_argument(
'--eval',
type=str,
nargs='+',
choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
help='eval types')
parser.add_argument('--show', action='store_true', help='show results')
args = parser.parse_args()
return args
def main():
args = parse_args()
if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
raise ValueError('The output file must be a pkl file.')
cfg = mmcv.Config.fromfile(args.config)
cfg.model.pretrained = None
cfg.data.test.test_mode = True
dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
if args.gpus == 1:
model = build_detector(
cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
load_checkpoint(model, args.checkpoint)
model = MMDataParallel(model, device_ids=[0])
data_loader = build_dataloader(
dataset,
imgs_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
num_gpus=1,
dist=False,
shuffle=False)
outputs = single_test(model, data_loader, args.show)
else:
model_args = cfg.model.copy()
model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
model_type = getattr(detectors, model_args.pop('type'))
outputs = parallel_test(
model_type,
model_args,
args.checkpoint,
dataset,
_data_func,
range(args.gpus),
workers_per_gpu=args.proc_per_gpu)
if args.out:
print('writing results to {}'.format(args.out))
mmcv.dump(outputs, args.out)
eval_types = args.eval
if eval_types:
print('Starting evaluate {}'.format(' and '.join(eval_types)))
if eval_types == ['proposal_fast']:
result_file = args.out
else:
result_file = args.out + '.json'
results2json(dataset, outputs, result_file)
coco_eval(result_file, eval_types, dataset.coco)
if __name__ == '__main__':
main()
from __future__ import division
import argparse
from mmcv import Config
from mmcv.runner import obj_from_dict
from mmdet import datasets, __version__
from mmdet.apis import (train_detector, init_dist, get_root_logger,
set_random_seed)
from mmdet.models import build_detector
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work_dir', help='the dir to save logs and models')
parser.add_argument(
'--validate',
action='store_true',
help='whether to evaluate the checkpoint during training')
parser.add_argument(
'--gpus',
type=int,
default=1,
help='number of gpus to use '
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# update configs according to CLI args
if args.work_dir is not None:
cfg.work_dir = args.work_dir
cfg.gpus = args.gpus
if cfg.checkpoint_config is not None:
# save mmdet version in checkpoints as meta data
cfg.checkpoint_config.meta = dict(
mmdet_version=__version__, config=cfg.text)
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# init logger before other steps
logger = get_root_logger(cfg.log_level)
logger.info('Distributed training: {}'.format(distributed))
# set random seeds
if args.seed is not None:
logger.info('Set random seed to {}'.format(args.seed))
set_random_seed(args.seed)
model = build_detector(
cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
train_dataset = obj_from_dict(cfg.data.train, datasets)
train_detector(
model,
train_dataset,
cfg,
distributed=distributed,
validate=args.validate,
logger=logger)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment