Merge pull request #20 from open-mmlab/dev

Initial public release

Merge pull request #20 from open-mmlab/dev
Initial public release
6efefa27 · Kai Chen · GitHub · 2cf13281 · 54b54d88 · 6efefa27
Unverified Commit 6efefa27 authored Oct 12, 2018 by Kai Chen Committed by GitHub Oct 12, 2018
8 changed files
--- a/mmdet/ops/roi_pool/setup.py
+++ b/mmdet/ops/roi_pool/setup.py
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+setup(
+    name='roi_pool',
+    ext_modules=[
+        CUDAExtension('roi_pool_cuda', [
+            'src/roi_pool_cuda.cpp',
+            'src/roi_pool_kernel.cu',
+        ])
+    ],
+    cmdclass={'build_ext': BuildExtension})
--- a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
+++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
+#include <torch/torch.h>
+
+#include <cmath>
+#include <vector>
+
+int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
+                          const float spatial_scale, const int channels,
+                          const int height, const int width, const int num_rois,
+                          const int pooled_h, const int pooled_w,
+                          at::Tensor output, at::Tensor argmax);
+
+int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
+                           const at::Tensor argmax, const float spatial_scale,
+                           const int batch_size, const int channels,
+                           const int height, const int width,
+                           const int num_rois, const int pooled_h,
+                           const int pooled_w, at::Tensor bottom_grad);
+
+#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+  AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
+                             int pooled_height, int pooled_width,
+                             float spatial_scale, at::Tensor output,
+                             at::Tensor argmax) {
+  CHECK_INPUT(features);
+  CHECK_INPUT(rois);
+  CHECK_INPUT(output);
+  CHECK_INPUT(argmax);
+
+  // Number of ROIs
+  int num_rois = rois.size(0);
+  int size_rois = rois.size(1);
+
+  if (size_rois != 5) {
+    printf("wrong roi size\n");
+    return 0;
+  }
+
+  int channels = features.size(1);
+  int height = features.size(2);
+  int width = features.size(3);
+
+  ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
+                        num_rois, pooled_height, pooled_width, output, argmax);
+
+  return 1;
+}
+
+int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
+                              at::Tensor argmax, float spatial_scale,
+                              at::Tensor bottom_grad) {
+  CHECK_INPUT(top_grad);
+  CHECK_INPUT(rois);
+  CHECK_INPUT(argmax);
+  CHECK_INPUT(bottom_grad);
+
+  int pooled_height = top_grad.size(2);
+  int pooled_width = top_grad.size(3);
+  int num_rois = rois.size(0);
+  int size_rois = rois.size(1);
+
+  if (size_rois != 5) {
+    printf("wrong roi size\n");
+    return 0;
+  }
+  int batch_size = bottom_grad.size(0);
+  int channels = bottom_grad.size(1);
+  int height = bottom_grad.size(2);
+  int width = bottom_grad.size(3);
+
+  ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
+                         channels, height, width, num_rois, pooled_height,
+                         pooled_width, bottom_grad);
+
+  return 1;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
+  m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
+}
--- a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
+++ b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
+#include <ATen/ATen.h>
+#include <THC/THCAtomics.cuh>
+
+using namespace at;  // temporal fix for pytorch<=0.4.1 (see #9848)
+
+#define CUDA_1D_KERNEL_LOOP(i, n)                            \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
+       i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 1024
+
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+  int max_block_num = 65000;
+  return min(optimal_block_num, max_block_num);
+}
+
+template <typename scalar_t>
+__global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
+                               const scalar_t *rois,
+                               const scalar_t spatial_scale, const int channels,
+                               const int height, const int width,
+                               const int pooled_h, const int pooled_w,
+                               scalar_t *top_data, int *argmax_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_w;
+    int ph = (index / pooled_w) % pooled_h;
+    int c = (index / pooled_w / pooled_h) % channels;
+    int n = index / pooled_w / pooled_h / channels;
+
+    const scalar_t *offset_rois = rois + n * 5;
+    int roi_batch_ind = offset_rois[0];
+    // calculate the roi region on feature maps
+    scalar_t roi_x1 = offset_rois[1] * spatial_scale;
+    scalar_t roi_y1 = offset_rois[2] * spatial_scale;
+    scalar_t roi_x2 = (offset_rois[3] + 1) * spatial_scale;
+    scalar_t roi_y2 = (offset_rois[4] + 1) * spatial_scale;
+
+    // force malformed rois to be 1x1
+    scalar_t roi_w = roi_x2 - roi_x1;
+    scalar_t roi_h = roi_y2 - roi_y1;
+    if (roi_w <= 0 || roi_h <= 0) continue;
+
+    scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w);
+    scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h);
+
+    // the corresponding bin region
+    int bin_x1 = floor(static_cast<scalar_t>(pw) * bin_size_w + roi_x1);
+    int bin_y1 = floor(static_cast<scalar_t>(ph) * bin_size_h + roi_y1);
+    int bin_x2 = ceil(static_cast<scalar_t>(pw + 1) * bin_size_w + roi_x1);
+    int bin_y2 = ceil(static_cast<scalar_t>(ph + 1) * bin_size_h + roi_y1);
+
+    // add roi offsets and clip to input boundaries
+    bin_x1 = min(max(bin_x1, 0), width);
+    bin_y1 = min(max(bin_y1, 0), height);
+    bin_x2 = min(max(bin_x2, 0), width);
+    bin_y2 = min(max(bin_y2, 0), height);
+    bool is_empty = (bin_y2 <= bin_y1) || (bin_x2 <= bin_x1);
+
+    // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
+    int max_idx = -1;
+    bottom_data += (roi_batch_ind * channels + c) * height * width;
+
+    // Define an empty pooling region to be zero
+    scalar_t max_val = is_empty ? static_cast<scalar_t>(0)
+                                : bottom_data[bin_y1 * width + bin_x1] - 1;
+
+    for (int h = bin_y1; h < bin_y2; ++h) {
+      for (int w = bin_x1; w < bin_x2; ++w) {
+        int offset = h * width + w;
+        if (bottom_data[offset] > max_val) {
+          max_val = bottom_data[offset];
+          max_idx = offset;
+        }
+      }
+    }
+    top_data[index] = max_val;
+    if (argmax_data != NULL) argmax_data[index] = max_idx;
+  }
+}
+
+int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
+                          const float spatial_scale, const int channels,
+                          const int height, const int width, const int num_rois,
+                          const int pooled_h, const int pooled_w,
+                          at::Tensor output, at::Tensor argmax) {
+  const int output_size = num_rois * channels * pooled_h * pooled_w;
+
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      features.type(), "ROIPoolLaucherForward", ([&] {
+        const scalar_t *bottom_data = features.data<scalar_t>();
+        const scalar_t *rois_data = rois.data<scalar_t>();
+        scalar_t *top_data = output.data<scalar_t>();
+        int *argmax_data = argmax.data<int>();
+
+        ROIPoolForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+                output_size, bottom_data, rois_data, scalar_t(spatial_scale),
+                channels, height, width, pooled_h, pooled_w, top_data,
+                argmax_data);
+      }));
+  cudaError_t err = cudaGetLastError();
+  if (cudaSuccess != err) {
+    fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
+    exit(-1);
+  }
+  return 1;
+}
+
+template <typename scalar_t>
+__global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff,
+                                const scalar_t *rois, const int *argmax_data,
+                                const scalar_t spatial_scale,
+                                const int channels, const int height,
+                                const int width, const int pooled_h,
+                                const int pooled_w, scalar_t *bottom_diff) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int pw = index % pooled_w;
+    int ph = (index / pooled_w) % pooled_h;
+    int c = (index / pooled_w / pooled_h) % channels;
+    int n = index / pooled_w / pooled_h / channels;
+
+    int roi_batch_ind = rois[n * 5];
+    int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +
+                                   ph * pooled_w + pw];
+
+    atomicAdd(bottom_diff + (roi_batch_ind * channels + c) * height * width +
+                  bottom_index,
+              top_diff[index]);
+  }
+}
+
+int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
+                           const at::Tensor argmax, const float spatial_scale,
+                           const int batch_size, const int channels,
+                           const int height, const int width,
+                           const int num_rois, const int pooled_h,
+                           const int pooled_w, at::Tensor bottom_grad) {
+  const int output_size = num_rois * pooled_h * pooled_w * channels;
+
+  // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved
+  AT_DISPATCH_FLOATING_TYPES(
+      top_grad.type(), "ROIPoolLaucherBackward", ([&] {
+        const scalar_t *top_diff = top_grad.data<scalar_t>();
+        const scalar_t *rois_data = rois.data<scalar_t>();
+        const int *argmax_data = argmax.data<int>();
+        scalar_t *bottom_diff = bottom_grad.data<scalar_t>();
+
+        if (sizeof(scalar_t) == sizeof(double)) {
+          fprintf(stderr, "double is not supported\n");
+          exit(-1);
+        }
+
+        ROIPoolBackward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+                output_size, top_diff, rois_data, argmax_data,
+                scalar_t(spatial_scale), channels, height, width, pooled_h,
+                pooled_w, bottom_diff);
+      }));
+  cudaError_t err = cudaGetLastError();
+  if (cudaSuccess != err) {
+    fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
+    exit(-1);
+  }
+
+  return 1;
+}
--- a/setup.py
+++ b/setup.py
+import os
+import subprocess
+import time
+from setuptools import find_packages, setup
+
+
+def readme():
+    with open('README.md') as f:
+        content = f.read()
+    return content
+
+
+MAJOR = 0
+MINOR = 5
+PATCH = 0
+SUFFIX = ''
+SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
+
+version_file = 'mmdet/version.py'
+
+
+def get_git_hash():
+
+    def _minimal_ext_cmd(cmd):
+        # construct minimal environment
+        env = {}
+        for k in ['SYSTEMROOT', 'PATH', 'HOME']:
+            v = os.environ.get(k)
+            if v is not None:
+                env[k] = v
+        # LANGUAGE is used on win32
+        env['LANGUAGE'] = 'C'
+        env['LANG'] = 'C'
+        env['LC_ALL'] = 'C'
+        out = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
+        return out
+
+    try:
+        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
+        sha = out.strip().decode('ascii')
+    except OSError:
+        sha = 'unknown'
+
+    return sha
+
+
+def get_hash():
+    if os.path.exists('.git'):
+        sha = get_git_hash()[:7]
+    elif os.path.exists(version_file):
+        try:
+            from mmdet.version import __version__
+            sha = __version__.split('+')[-1]
+        except ImportError:
+            raise ImportError('Unable to get git version')
+    else:
+        sha = 'unknown'
+
+    return sha
+
+
+def write_version_py():
+    content = """# GENERATED VERSION FILE
+# TIME: {}
+
+__version__ = '{}'
+short_version = '{}'
+"""
+    sha = get_hash()
+    VERSION = SHORT_VERSION + '+' + sha
+
+    with open(version_file, 'w') as f:
+        f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
+
+
+def get_version():
+    with open(version_file, 'r') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+
+
+if __name__ == '__main__':
+    write_version_py()
+    setup(
+        name='mmdet',
+        version=get_version(),
+        description='Open MMLab Detection Toolbox',
+        long_description=readme(),
+        keywords='computer vision, object detection',
+        url='https://github.com/open-mmlab/mmdetection',
+        packages=find_packages(),
+        package_data={'mmdet.ops': ['*/*.so']},
+        classifiers=[
+            'Development Status :: 4 - Beta',
+            'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+            'Operating System :: OS Independent',
+            'Programming Language :: Python :: 2',
+            'Programming Language :: Python :: 2.7',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3.4',
+            'Programming Language :: Python :: 3.5',
+            'Programming Language :: Python :: 3.6',
+        ],
+        license='GPLv3',
+        setup_requires=['pytest-runner'],
+        tests_require=['pytest'],
+        install_requires=[
+            'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables',
+            'pycocotools'
+        ],
+        zip_safe=False)
--- a/tools/coco_eval.py
+++ b/tools/coco_eval.py
+from argparse import ArgumentParser
+
+from mmdet.core import coco_eval
+
+
+def main():
+    parser = ArgumentParser(description='COCO Evaluation')
+    parser.add_argument('result', help='result file path')
+    parser.add_argument('--ann', help='annotation file path')
+    parser.add_argument(
+        '--types',
+        type=str,
+        nargs='+',
+        choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
+        default=['bbox'],
+        help='result types')
+    parser.add_argument(
+        '--max-dets',
+        type=int,
+        nargs='+',
+        default=[100, 300, 1000],
+        help='proposal numbers, only used for recall evaluation')
+    args = parser.parse_args()
+    coco_eval(args.result, args.types, args.ann, args.max_dets)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/dist_train.sh
+++ b/tools/dist_train.sh
+#!/usr/bin/env bash
+
+PYTHON=${PYTHON:-"python"}
+
+$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3}
--- a/tools/test.py
+++ b/tools/test.py
+import argparse
+
+import torch
+import mmcv
+from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict
+from mmcv.parallel import scatter, collate, MMDataParallel
+
+from mmdet import datasets
+from mmdet.core import results2json, coco_eval
+from mmdet.datasets import build_dataloader
+from mmdet.models import build_detector, detectors
+
+
+def single_test(model, data_loader, show=False):
+    model.eval()
+    results = []
+    prog_bar = mmcv.ProgressBar(len(data_loader.dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=not show, **data)
+        results.append(result)
+
+        if show:
+            model.module.show_result(data, result,
+                                     data_loader.dataset.img_norm_cfg)
+
+        batch_size = data['img'][0].size(0)
+        for _ in range(batch_size):
+            prog_bar.update()
+    return results
+
+
+def _data_func(data, device_id):
+    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
+    return dict(return_loss=False, rescale=True, **data)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMDet test detector')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument(
+        '--gpus', default=1, type=int, help='GPU number used for testing')
+    parser.add_argument(
+        '--proc_per_gpu',
+        default=1,
+        type=int,
+        help='Number of processes per GPU')
+    parser.add_argument('--out', help='output result file')
+    parser.add_argument(
+        '--eval',
+        type=str,
+        nargs='+',
+        choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
+        help='eval types')
+    parser.add_argument('--show', action='store_true', help='show results')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+        raise ValueError('The output file must be a pkl file.')
+
+    cfg = mmcv.Config.fromfile(args.config)
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
+    if args.gpus == 1:
+        model = build_detector(
+            cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+        load_checkpoint(model, args.checkpoint)
+        model = MMDataParallel(model, device_ids=[0])
+
+        data_loader = build_dataloader(
+            dataset,
+            imgs_per_gpu=1,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            num_gpus=1,
+            dist=False,
+            shuffle=False)
+        outputs = single_test(model, data_loader, args.show)
+    else:
+        model_args = cfg.model.copy()
+        model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
+        model_type = getattr(detectors, model_args.pop('type'))
+        outputs = parallel_test(
+            model_type,
+            model_args,
+            args.checkpoint,
+            dataset,
+            _data_func,
+            range(args.gpus),
+            workers_per_gpu=args.proc_per_gpu)
+
+    if args.out:
+        print('writing results to {}'.format(args.out))
+        mmcv.dump(outputs, args.out)
+        eval_types = args.eval
+        if eval_types:
+            print('Starting evaluate {}'.format(' and '.join(eval_types)))
+            if eval_types == ['proposal_fast']:
+                result_file = args.out
+            else:
+                result_file = args.out + '.json'
+                results2json(dataset, outputs, result_file)
+            coco_eval(result_file, eval_types, dataset.coco)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/train.py
+++ b/tools/train.py
+from __future__ import division
+
+import argparse
+from mmcv import Config
+from mmcv.runner import obj_from_dict
+
+from mmdet import datasets, __version__
+from mmdet.apis import (train_detector, init_dist, get_root_logger,
+                        set_random_seed)
+from mmdet.models import build_detector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a detector')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work_dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--validate',
+        action='store_true',
+        help='whether to evaluate the checkpoint during training')
+    parser.add_argument(
+        '--gpus',
+        type=int,
+        default=1,
+        help='number of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument('--seed', type=int, default=None, help='random seed')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    # update configs according to CLI args
+    if args.work_dir is not None:
+        cfg.work_dir = args.work_dir
+    cfg.gpus = args.gpus
+    if cfg.checkpoint_config is not None:
+        # save mmdet version in checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmdet_version=__version__, config=cfg.text)
+
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+
+    # init logger before other steps
+    logger = get_root_logger(cfg.log_level)
+    logger.info('Distributed training: {}'.format(distributed))
+
+    # set random seeds
+    if args.seed is not None:
+        logger.info('Set random seed to {}'.format(args.seed))
+        set_random_seed(args.seed)
+
+    model = build_detector(
+        cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
+    train_dataset = obj_from_dict(cfg.data.train, datasets)
+    train_detector(
+        model,
+        train_dataset,
+        cfg,
+        distributed=distributed,
+        validate=args.validate,
+        logger=logger)
+
+
+if __name__ == '__main__':
+    main()