Merge pull request #8 from hellock/dev

API cleaning and code refactoring (WIP)

Merge pull request #8 from hellock/dev
API cleaning and code refactoring (WIP)
7d343fd2 · Kai Chen · GitHub · 0e0b9246 · 630687f4 · 7d343fd2
Unverified Commit 7d343fd2 authored Oct 09, 2018 by Kai Chen Committed by GitHub Oct 09, 2018
13 changed files
--- a/mmdet/ops/roi_align/src/roi_align_cuda.cpp
+++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp
@@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
                            const int pooled_height, const int pooled_width,
                            at::Tensor bottom_grad);

-#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ")
+#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 #define CHECK_CONTIGUOUS(x) \
-  AT_ASSERT(x.is_contiguous(), #x " must be contiguous ")
+  AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 #define CHECK_INPUT(x) \
  CHECK_CUDA(x);       \
  CHECK_CONTIGUOUS(x)

--- a/mmdet/ops/roi_align/src/roi_align_kernel.cu
+++ b/mmdet/ops/roi_align/src/roi_align_kernel.cu
 #include <ATen/ATen.h>
+#include <THC/THCAtomics.cuh>

-#include <cuda.h>
-#include <cuda_runtime.h>
+using namespace at;  // temporal fix for pytorch<=0.4.1 (see #9848)

-#include <math.h>
-#include <stdio.h>
-#include <vector>
-
-#define CUDA_1D_KERNEL_LOOP(i, n)                                              \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n;                   \
+#define CUDA_1D_KERNEL_LOOP(i, n)                            \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
       i += blockDim.x * gridDim.x)

 #define THREADS_PER_BLOCK 1024
@@ -28,10 +24,8 @@ __device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data,
    return 0;
  }

-  if (y <= 0)
-    y = 0;
-  if (x <= 0)
-    x = 0;
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;

  int y_low = (int)y;
  int x_low = (int)x;
@@ -69,12 +63,13 @@ __device__ scalar_t bilinear_interpolate(const scalar_t *bottom_data,
 }

 template <typename scalar_t>
-__global__ void
-ROIAlignForward(const int nthreads, const scalar_t *bottom_data,
-                const scalar_t *bottom_rois, const scalar_t spatial_scale,
-                const int sample_num, const int channels, const int height,
-                const int width, const int pooled_height,
-                const int pooled_width, scalar_t *top_data) {
+__global__ void ROIAlignForward(const int nthreads, const scalar_t *bottom_data,
+                                const scalar_t *bottom_rois,
+                                const scalar_t spatial_scale,
+                                const int sample_num, const int channels,
+                                const int height, const int width,
+                                const int pooled_height, const int pooled_width,
+                                scalar_t *top_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the aligned output
    int pw = index % pooled_width;
@@ -101,7 +96,7 @@ ROIAlignForward(const int nthreads, const scalar_t *bottom_data,

    int sample_num_h = (sample_num > 0)
                           ? sample_num
-                           : ceil(roi_height / pooled_height); // e.g., = 2
+                           : ceil(roi_height / pooled_height);  // e.g., = 2
    int sample_num_w =
        (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);

@@ -137,17 +132,17 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
                           const int pooled_height, const int pooled_width,
                           at::Tensor output) {
  const int output_size = num_rois * pooled_height * pooled_width * channels;
-  AT_DISPATCH_FLOATING_TYPES(
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.type(), "ROIAlignLaucherForward", ([&] {
        const scalar_t *bottom_data = features.data<scalar_t>();
        const scalar_t *rois_data = rois.data<scalar_t>();
        scalar_t *top_data = output.data<scalar_t>();

-        ROIAlignForward<
-            scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
-            output_size, bottom_data, rois_data, scalar_t(spatial_scale),
-            sample_num, channels, height, width, pooled_height, pooled_width,
-            top_data);
+        ROIAlignForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+                output_size, bottom_data, rois_data, scalar_t(spatial_scale),
+                sample_num, channels, height, width, pooled_height,
+                pooled_width, top_data);
      }));
  cudaError_t err = cudaGetLastError();
  if (cudaSuccess != err) {
@@ -159,11 +154,12 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 }

 template <typename scalar_t>
-__device__ void
-bilinear_interpolate_gradient(const int height, const int width, scalar_t y,
-                              scalar_t x, scalar_t &w1, scalar_t &w2,
-                              scalar_t &w3, scalar_t &w4, int &x_low,
-                              int &x_high, int &y_low, int &y_high) {
+__device__ void bilinear_interpolate_gradient(const int height, const int width,
+                                              scalar_t y, scalar_t x,
+                                              scalar_t &w1, scalar_t &w2,
+                                              scalar_t &w3, scalar_t &w4,
+                                              int &x_low, int &x_high,
+                                              int &y_low, int &y_high) {
  // deal with cases that inverse elements are out of feature map boundary
  if (y < -1.0 || y > height || x < -1.0 || x > width) {
    w1 = w2 = w3 = w4 = 0.;
@@ -171,10 +167,8 @@ bilinear_interpolate_gradient(const int height, const int width, scalar_t y,
    return;
  }

-  if (y <= 0)
-    y = 0;
-  if (x <= 0)
-    x = 0;
+  if (y <= 0) y = 0;
+  if (x <= 0) x = 0;

  y_low = (int)y;
  x_low = (int)x;
@@ -204,12 +198,11 @@ bilinear_interpolate_gradient(const int height, const int width, scalar_t y,
 }

 template <typename scalar_t>
-__global__ void
-ROIAlignBackward(const int nthreads, const scalar_t *top_diff,
-                 const scalar_t *bottom_rois, const scalar_t spatial_scale,
-                 const int sample_num, const int channels, const int height,
-                 const int width, const int pooled_height,
-                 const int pooled_width, scalar_t *bottom_diff) {
+__global__ void ROIAlignBackward(
+    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
+    const scalar_t spatial_scale, const int sample_num, const int channels,
+    const int height, const int width, const int pooled_height,
+    const int pooled_width, scalar_t *bottom_diff) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the aligned output
    int pw = index % pooled_width;
@@ -239,7 +232,7 @@ ROIAlignBackward(const int nthreads, const scalar_t *top_diff,

    int sample_num_h = (sample_num > 0)
                           ? sample_num
-                           : ceil(roi_height / pooled_height); // e.g., = 2
+                           : ceil(roi_height / pooled_height);  // e.g., = 2
    int sample_num_w =
        (sample_num > 0) ? sample_num : ceil(roi_width / pooled_width);

@@ -279,13 +272,6 @@ ROIAlignBackward(const int nthreads, const scalar_t *top_diff,
  }
 }

-template <>
-__global__ void ROIAlignBackward<double>(
-    const int nthreads, const double *top_diff, const double *bottom_rois,
-    const double spatial_scale, const int sample_num, const int channels,
-    const int height, const int width, const int pooled_height,
-    const int pooled_width, double *bottom_diff) {}
-
 int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
                            const float spatial_scale, const int sample_num,
                            const int channels, const int height,
@@ -294,6 +280,7 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
                            at::Tensor bottom_grad) {
  const int output_size = num_rois * pooled_height * pooled_width * channels;

+  // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved
  AT_DISPATCH_FLOATING_TYPES(
      top_grad.type(), "ROIAlignLaucherBackward", ([&] {
        const scalar_t *top_diff = top_grad.data<scalar_t>();
@@ -304,10 +291,11 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
          exit(-1);
        }

-        ROIAlignBackward<
-            scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
-            output_size, top_diff, rois_data, spatial_scale, sample_num,
-            channels, height, width, pooled_height, pooled_width, bottom_diff);
+        ROIAlignBackward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+                output_size, top_diff, rois_data, spatial_scale, sample_num,
+                channels, height, width, pooled_height, pooled_width,
+                bottom_diff);
      }));
  cudaError_t err = cudaGetLastError();
  if (cudaSuccess != err) {

--- a/mmdet/ops/roi_pool/__init__.py
+++ b/mmdet/ops/roi_pool/__init__.py
 from .functions.roi_pool import roi_pool
 from .modules.roi_pool import RoIPool
+
+__all__ = ['roi_pool', 'RoIPool']
--- a/mmdet/ops/roi_pool/gradcheck.py
+++ b/mmdet/ops/roi_pool/gradcheck.py
@@ -4,7 +4,7 @@ from torch.autograd import gradcheck
 import os.path as osp
 import sys
 sys.path.append(osp.abspath(osp.join(__file__, '../../')))
-from roi_pooling import RoIPool
+from roi_pool import RoIPool  # noqa: E402

 feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
 rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],

--- a/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
+++ b/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
@@ -16,9 +16,9 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
                           const int num_rois, const int pooled_h,
                           const int pooled_w, at::Tensor bottom_grad);

-#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ")
+#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 #define CHECK_CONTIGUOUS(x) \
-  AT_ASSERT(x.is_contiguous(), #x " must be contiguous ")
+  AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 #define CHECK_INPUT(x) \
  CHECK_CUDA(x);       \
  CHECK_CONTIGUOUS(x)

--- a/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
+++ b/mmdet/ops/roi_pool/src/roi_pool_kernel.cu
 #include <ATen/ATen.h>
+#include <THC/THCAtomics.cuh>

-#include <cuda.h>
-#include <cuda_runtime.h>
+using namespace at;  // temporal fix for pytorch<=0.4.1 (see #9848)

-#include <math.h>
-#include <stdio.h>
-#include <vector>
-
-#define CUDA_1D_KERNEL_LOOP(i, n)                                              \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n;                   \
+#define CUDA_1D_KERNEL_LOOP(i, n)                            \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
       i += blockDim.x * gridDim.x)

 #define THREADS_PER_BLOCK 1024
@@ -44,8 +40,7 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
    // force malformed rois to be 1x1
    scalar_t roi_w = roi_x2 - roi_x1;
    scalar_t roi_h = roi_y2 - roi_y1;
-    if (roi_w <= 0 || roi_h <= 0)
-      continue;
+    if (roi_w <= 0 || roi_h <= 0) continue;

    scalar_t bin_size_w = roi_w / static_cast<scalar_t>(pooled_w);
    scalar_t bin_size_h = roi_h / static_cast<scalar_t>(pooled_h);
@@ -68,7 +63,8 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
    bottom_data += (roi_batch_ind * channels + c) * height * width;

    // Define an empty pooling region to be zero
-    scalar_t max_val = is_empty ? 0 : bottom_data[bin_y1 * width + bin_x1] - 1;
+    scalar_t max_val = is_empty ? static_cast<scalar_t>(0)
+                                : bottom_data[bin_y1 * width + bin_x1] - 1;

    for (int h = bin_y1; h < bin_y2; ++h) {
      for (int w = bin_x1; w < bin_x2; ++w) {
@@ -80,8 +76,7 @@ __global__ void ROIPoolForward(const int nthreads, const scalar_t *bottom_data,
      }
    }
    top_data[index] = max_val;
-    if (argmax_data != NULL)
-      argmax_data[index] = max_idx;
+    if (argmax_data != NULL) argmax_data[index] = max_idx;
  }
 }

@@ -92,17 +87,18 @@ int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
                          at::Tensor output, at::Tensor argmax) {
  const int output_size = num_rois * channels * pooled_h * pooled_w;

-  AT_DISPATCH_FLOATING_TYPES(
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      features.type(), "ROIPoolLaucherForward", ([&] {
        const scalar_t *bottom_data = features.data<scalar_t>();
        const scalar_t *rois_data = rois.data<scalar_t>();
        scalar_t *top_data = output.data<scalar_t>();
        int *argmax_data = argmax.data<int>();

-        ROIPoolForward<
-            scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
-            output_size, bottom_data, rois_data, scalar_t(spatial_scale),
-            channels, height, width, pooled_h, pooled_w, top_data, argmax_data);
+        ROIPoolForward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+                output_size, bottom_data, rois_data, scalar_t(spatial_scale),
+                channels, height, width, pooled_h, pooled_w, top_data,
+                argmax_data);
      }));
  cudaError_t err = cudaGetLastError();
  if (cudaSuccess != err) {
@@ -135,28 +131,6 @@ __global__ void ROIPoolBackward(const int nthreads, const scalar_t *top_diff,
  }
 }

-template <>
-__global__ void
-ROIPoolBackward<double>(const int nthreads, const double *top_diff,
-                        const double *rois, const int *argmax_data,
-                        const double spatial_scale, const int channels,
-                        const int height, const int width, const int pooled_h,
-                        const int pooled_w, double *bottom_diff) {
-  // CUDA_1D_KERNEL_LOOP(index, nthreads) {
-  //   int pw = index % pooled_w;
-  //   int ph = (index / pooled_w) % pooled_h;
-  //   int c = (index / pooled_w / pooled_h) % channels;
-  //   int n = index / pooled_w / pooled_h / channels;
-
-  //   int roi_batch_ind = rois[n * 5];
-  //   int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +
-  //                                  ph * pooled_w + pw];
-
-  //   *(bottom_diff + (roi_batch_ind * channels + c) * height * width +
-  //                 bottom_index) +=top_diff[index];
-  // }
-}
-
 int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
                           const at::Tensor argmax, const float spatial_scale,
                           const int batch_size, const int channels,
@@ -165,6 +139,7 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
                           const int pooled_w, at::Tensor bottom_grad) {
  const int output_size = num_rois * pooled_h * pooled_w * channels;

+  // TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved
  AT_DISPATCH_FLOATING_TYPES(
      top_grad.type(), "ROIPoolLaucherBackward", ([&] {
        const scalar_t *top_diff = top_grad.data<scalar_t>();
@@ -177,11 +152,11 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
          exit(-1);
        }

-        ROIPoolBackward<
-            scalar_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
-            output_size, top_diff, rois_data, argmax_data,
-            scalar_t(spatial_scale), channels, height, width, pooled_h,
-            pooled_w, bottom_diff);
+        ROIPoolBackward<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+                output_size, top_diff, rois_data, argmax_data,
+                scalar_t(spatial_scale), channels, height, width, pooled_h,
+                pooled_w, bottom_diff);
      }));
  cudaError_t err = cudaGetLastError();
  if (cudaSuccess != err) {

--- a/mmdet/version.py
+++ b/mmdet/version.py
-__version__ = '0.5.0'
--- a/setup.py
+++ b/setup.py
+import os
+import subprocess
+import time
 from setuptools import find_packages, setup


@@ -7,34 +10,102 @@ def readme():
    return content


+MAJOR = 0
+MINOR = 5
+PATCH = 0
+SUFFIX = ''
+SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
+
+version_file = 'mmdet/version.py'
+
+
+def get_git_hash():
+
+    def _minimal_ext_cmd(cmd):
+        # construct minimal environment
+        env = {}
+        for k in ['SYSTEMROOT', 'PATH', 'HOME']:
+            v = os.environ.get(k)
+            if v is not None:
+                env[k] = v
+        # LANGUAGE is used on win32
+        env['LANGUAGE'] = 'C'
+        env['LANG'] = 'C'
+        env['LC_ALL'] = 'C'
+        out = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
+        return out
+
+    try:
+        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
+        sha = out.strip().decode('ascii')
+    except OSError:
+        sha = 'unknown'
+
+    return sha
+
+
+def get_hash():
+    if os.path.exists('.git'):
+        sha = get_git_hash()[:7]
+    elif os.path.exists(version_file):
+        try:
+            from mmdet.version import __version__
+            sha = __version__.split('+')[-1]
+        except ImportError:
+            raise ImportError('Unable to get git version')
+    else:
+        sha = 'unknown'
+
+    return sha
+
+
+def write_version_py():
+    content = """# GENERATED VERSION FILE
+# TIME: {}
+
+__version__ = '{}'
+short_version = '{}'
+"""
+    sha = get_hash()
+    VERSION = SHORT_VERSION + '+' + sha
+
+    with open(version_file, 'w') as f:
+        f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
+
+
 def get_version():
-    version_file = 'mmcv/version.py'
    with open(version_file, 'r') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']


-setup(
-    name='mmdet',
-    version=get_version(),
-    description='Open MMLab Detection Toolbox',
-    long_description=readme(),
-    keywords='computer vision, object detection',
-    packages=find_packages(),
-    classifiers=[
-        'Development Status :: 4 - Beta',
-        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
-        'Topic :: Utilities',
-    ],
-    license='GPLv3',
-    setup_requires=['pytest-runner'],
-    tests_require=['pytest'],
-    install_requires=['numpy', 'matplotlib', 'six', 'terminaltables'],
-    zip_safe=False)
+if __name__ == '__main__':
+    write_version_py()
+    setup(
+        name='mmdet',
+        version=get_version(),
+        description='Open MMLab Detection Toolbox',
+        long_description=readme(),
+        keywords='computer vision, object detection',
+        url='https://github.com/open-mmlab/mmdetection',
+        packages=find_packages(),
+        package_data={'mmdet.ops': ['*/*.so']},
+        classifiers=[
+            'Development Status :: 4 - Beta',
+            'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+            'Operating System :: OS Independent',
+            'Programming Language :: Python :: 2',
+            'Programming Language :: Python :: 2.7',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3.4',
+            'Programming Language :: Python :: 3.5',
+            'Programming Language :: Python :: 3.6',
+        ],
+        license='GPLv3',
+        setup_requires=['pytest-runner'],
+        tests_require=['pytest'],
+        install_requires=[
+            'numpy', 'matplotlib', 'six', 'terminaltables', 'pycocotools'
+        ],
+        zip_safe=False)
--- a/tools/coco_eval.py
+++ b/tools/coco_eval.py
+from argparse import ArgumentParser
+
+from mmdet.core import coco_eval
+
+
+def main():
+    parser = ArgumentParser(description='COCO Evaluation')
+    parser.add_argument('result', help='result file path')
+    parser.add_argument('--ann', help='annotation file path')
+    parser.add_argument(
+        '--types',
+        type=str,
+        nargs='+',
+        choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
+        default=['bbox'],
+        help='result types')
+    parser.add_argument(
+        '--max-dets',
+        type=int,
+        nargs='+',
+        default=[100, 300, 1000],
+        help='proposal numbers, only used for recall evaluation')
+    args = parser.parse_args()
+    coco_eval(args.result, args.types, args.ann, args.max_dets)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/dist_train.sh
+++ b/tools/dist_train.sh
+#!/usr/bin/env bash
+
+PYTHON=${PYTHON:-"python"}
+
+$PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3}
--- a/tools/eval.py
+++ b/tools/eval.py
-from argparse import ArgumentParser
-from multiprocessing import Pool
-import matplotlib.pyplot as plt
-import numpy as np
-import copy
-import os
-
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
-
-
-def generate_area_range(splitRng=32, stop_size=128):
-    areaRng = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2]]
-    start = 0
-    while start < stop_size:
-        end = start + splitRng
-        areaRng.append([start * start, end * end])
-        start = end
-    areaRng.append([start * start, 1e5**2])
-    return areaRng
-
-
-def print_summarize(iouThr=None,
-                    iouThrs=None,
-                    precision=None,
-                    recall=None,
-                    areaRng_id=4,
-                    areaRngs=None,
-                    maxDets_id=2,
-                    maxDets=None):
-    assert (precision is not None) or (recall is not None)
-    iStr = ' {:<18} {} @[ IoU={:<9} | size={:>5}-{:>5} | maxDets={:>3d} ] = {:0.3f}'
-    titleStr = 'Average Precision' if precision is not None else 'Average Recall'
-    typeStr = '(AP)' if precision is not None else '(AR)'
-    iouStr = '{:0.2f}:{:0.2f}'.format(iouThrs[0], iouThrs[-1]) \
-        if iouThr is None else '{:0.2f}'.format(iouThr)
-
-    aind = [areaRng_id]
-    mind = [maxDets_id]
-    if precision is not None:
-        # dimension of precision: [TxRxKxAxM]
-        s = precision
-        # IoU
-        if iouThr is not None:
-            t = np.where(iouThr == iouThrs)[0]
-            s = s[t]
-        s = s[:, :, :, aind, mind]
-    else:
-        # dimension of recall: [TxKxAxM]
-        s = recall
-        if iouThr is not None:
-            t = np.where(iouThr == iouThrs)[0]
-            s = s[t]
-        s = s[:, :, aind, mind]
-    if len(s[s > -1]) == 0:
-        mean_s = -1
-    else:
-        mean_s = np.mean(s[s > -1])
-    print(
-        iStr.format(
-            titleStr, typeStr, iouStr, np.sqrt(areaRngs[areaRng_id][0]),
-            np.sqrt(areaRngs[areaRng_id][1])
-            if np.sqrt(areaRngs[areaRng_id][1]) < 999 else 'max',
-            maxDets[maxDets_id], mean_s))
-
-
-def eval_results(res_file, ann_file, res_types, splitRng):
-    for res_type in res_types:
-        assert res_type in ['proposal', 'bbox', 'segm', 'keypoints']
-
-    areaRng = generate_area_range(splitRng)
-    cocoGt = COCO(ann_file)
-    cocoDt = cocoGt.loadRes(res_file)
-    imgIds = cocoGt.getImgIds()
-    for res_type in res_types:
-        iou_type = 'bbox' if res_type == 'proposal' else res_type
-        cocoEval = COCOeval(cocoGt, cocoDt, iou_type)
-        cocoEval.params.imgIds = imgIds
-        if res_type == 'proposal':
-            cocoEval.params.useCats = 0
-            cocoEval.params.maxDets = [100, 300, 1000]
-        cocoEval.params.areaRng = areaRng
-        cocoEval.evaluate()
-        cocoEval.accumulate()
-        cocoEval.summarize()
-        ps = cocoEval.eval['precision']
-        rc = cocoEval.eval['recall']
-        for i in range(len(areaRng)):
-            print_summarize(None, cocoEval.params.iouThrs, ps, None, i,
-                            areaRng, 2, cocoEval.params.maxDets)
-
-
-def makeplot(rs, ps, outDir, class_name):
-    cs = np.vstack([
-        np.ones((2, 3)),
-        np.array([.31, .51, .74]),
-        np.array([.75, .31, .30]),
-        np.array([.36, .90, .38]),
-        np.array([.50, .39, .64]),
-        np.array([1, .6, 0])
-    ])
-    areaNames = ['all', 'small', 'medium', 'large']
-    types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']
-    for i in range(len(areaNames)):
-        area_ps = ps[..., i, 0]
-        figure_tile = class_name + '-' + areaNames[i]
-        aps = [ps_.mean() for ps_ in area_ps]
-        ps_curve = [
-            ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps
-        ]
-        ps_curve.insert(0, np.zeros(ps_curve[0].shape))
-        fig = plt.figure()
-        ax = plt.subplot(111)
-        for k in range(len(types)):
-            ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5)
-            ax.fill_between(
-                rs,
-                ps_curve[k],
-                ps_curve[k + 1],
-                color=cs[k],
-                label=str('[{:.3f}'.format(aps[k]) + ']' + types[k]))
-        plt.xlabel('recall')
-        plt.ylabel('precision')
-        plt.xlim(0, 1.)
-        plt.ylim(0, 1.)
-        plt.title(figure_tile)
-        plt.legend()
-        # plt.show()
-        fig.savefig(outDir + '/{}.png'.format(figure_tile))
-        plt.close(fig)
-
-
-def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type):
-    nm = cocoGt.loadCats(catId)[0]
-    print('--------------analyzing {}-{}---------------'.format(
-        k + 1, nm['name']))
-    ps_ = {}
-    dt = copy.deepcopy(cocoDt)
-    nm = cocoGt.loadCats(catId)[0]
-    imgIds = cocoGt.getImgIds()
-    dt_anns = dt.dataset['annotations']
-    select_dt_anns = []
-    for ann in dt_anns:
-        if ann['category_id'] == catId:
-            select_dt_anns.append(ann)
-    dt.dataset['annotations'] = select_dt_anns
-    dt.createIndex()
-    # compute precision but ignore superclass confusion
-    gt = copy.deepcopy(cocoGt)
-    child_catIds = gt.getCatIds(supNms=[nm['supercategory']])
-    for idx, ann in enumerate(gt.dataset['annotations']):
-        if (ann['category_id'] in child_catIds
-                and ann['category_id'] != catId):
-            gt.dataset['annotations'][idx]['ignore'] = 1
-            gt.dataset['annotations'][idx]['iscrowd'] = 1
-            gt.dataset['annotations'][idx]['category_id'] = catId
-    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
-    cocoEval.params.imgIds = imgIds
-    cocoEval.params.maxDets = [100]
-    cocoEval.params.iouThrs = [.1]
-    cocoEval.params.useCats = 1
-    cocoEval.evaluate()
-    cocoEval.accumulate()
-    ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :]
-    ps_['ps_supercategory'] = ps_supercategory
-    # compute precision but ignore any class confusion
-    gt = copy.deepcopy(cocoGt)
-    for idx, ann in enumerate(gt.dataset['annotations']):
-        if ann['category_id'] != catId:
-            gt.dataset['annotations'][idx]['ignore'] = 1
-            gt.dataset['annotations'][idx]['iscrowd'] = 1
-            gt.dataset['annotations'][idx]['category_id'] = catId
-    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
-    cocoEval.params.imgIds = imgIds
-    cocoEval.params.maxDets = [100]
-    cocoEval.params.iouThrs = [.1]
-    cocoEval.params.useCats = 1
-    cocoEval.evaluate()
-    cocoEval.accumulate()
-    ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :]
-    ps_['ps_allcategory'] = ps_allcategory
-    return k, ps_
-
-
-def analyze_results(res_file, ann_file, res_types, out_dir):
-    for res_type in res_types:
-        assert res_type in ['bbox', 'segm']
-
-    directory = os.path.dirname(out_dir + '/')
-    if not os.path.exists(directory):
-        print('-------------create {}-----------------'.format(out_dir))
-        os.makedirs(directory)
-
-    cocoGt = COCO(ann_file)
-    cocoDt = cocoGt.loadRes(res_file)
-    imgIds = cocoGt.getImgIds()
-    for res_type in res_types:
-        iou_type = res_type
-        cocoEval = COCOeval(
-            copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type)
-        cocoEval.params.imgIds = imgIds
-        cocoEval.params.iouThrs = [.75, .5, .1]
-        cocoEval.params.maxDets = [100]
-        cocoEval.evaluate()
-        cocoEval.accumulate()
-        ps = cocoEval.eval['precision']
-        ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))])
-        catIds = cocoGt.getCatIds()
-        recThrs = cocoEval.params.recThrs
-        with Pool(processes=48) as pool:
-            args = [(k, cocoDt, cocoGt, catId, iou_type)
-                    for k, catId in enumerate(catIds)]
-            analyze_results = pool.starmap(analyze_individual_category, args)
-        for k, catId in enumerate(catIds):
-            nm = cocoGt.loadCats(catId)[0]
-            print('--------------saving {}-{}---------------'.format(
-                k + 1, nm['name']))
-            analyze_result = analyze_results[k]
-            assert k == analyze_result[0]
-            ps_supercategory = analyze_result[1]['ps_supercategory']
-            ps_allcategory = analyze_result[1]['ps_allcategory']
-            # compute precision but ignore superclass confusion
-            ps[3, :, k, :, :] = ps_supercategory
-            # compute precision but ignore any class confusion
-            ps[4, :, k, :, :] = ps_allcategory
-            # fill in background and false negative errors and plot
-            ps[ps == -1] = 0
-            ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0)
-            ps[6, :, k, :, :] = 1.0
-            makeplot(recThrs, ps[:, :, k], out_dir, nm['name'])
-        makeplot(recThrs, ps, out_dir, 'all')
-
-
-def main():
-    parser = ArgumentParser(description='COCO Evaluation')
-    parser.add_argument('result', help='result file path')
-    parser.add_argument(
-        '--ann',
-        default='/mnt/SSD/dataset/coco/annotations/instances_minival2017.json',
-        help='annotation file path')
-    parser.add_argument(
-        '--types', type=str, nargs='+', default=['bbox'], help='result types')
-    parser.add_argument(
-        '--analyze', action='store_true', help='whether to analyze results')
-    parser.add_argument(
-        '--out_dir',
-        type=str,
-        default=None,
-        help='dir to save analyze result images')
-    parser.add_argument(
-        '--splitRng',
-        type=int,
-        default=32,
-        help='range to split area in evaluation')
-    args = parser.parse_args()
-    if not args.analyze:
-        eval_results(args.result, args.ann, args.types, splitRng=args.splitRng)
-    else:
-        assert args.out_dir is not None
-        analyze_results(
-            args.result, args.ann, args.types, out_dir=args.out_dir)
-
-
-if __name__ == '__main__':
-    main()
--- a/tools/test.py
+++ b/tools/test.py
-import os.path as osp
-import sys
-sys.path.append(osp.abspath(osp.join(__file__, '../../')))
-sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv')
 import argparse

-import numpy as np
 import torch
-
 import mmcv
-from mmcv import Config
-from mmcv.torchpack import load_checkpoint, parallel_test
-from mmdet.core import _data_func, results2json
-from mmdet.datasets import CocoDataset
-from mmdet.datasets.data_engine import build_data
-from mmdet.models import Detector
+from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict
+from mmcv.parallel import scatter, MMDataParallel
+
+from mmdet import datasets
+from mmdet.core import results2json, coco_eval
+from mmdet.datasets import collate, build_dataloader
+from mmdet.models import build_detector, detectors
+
+
+def single_test(model, data_loader, show=False):
+    model.eval()
+    results = []
+    prog_bar = mmcv.ProgressBar(len(data_loader.dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(**data, return_loss=False, rescale=not show)
+        results.append(result)
+
+        if show:
+            model.module.show_result(data, result,
+                                     data_loader.dataset.img_norm_cfg)
+
+        batch_size = data['img'][0].size(0)
+        for _ in range(batch_size):
+            prog_bar.update()
+    return results
+
+
+def _data_func(data, device_id):
+    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
+    return dict(**data, return_loss=False, rescale=True)


 def parse_args():
    parser = argparse.ArgumentParser(description='MMDet test detector')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('checkpoint', help='checkpoint file')
-    parser.add_argument('--world_size', default=1, type=int)
+    parser.add_argument('--gpus', default=1, type=int)
    parser.add_argument('--out', help='output result file')
    parser.add_argument(
-        '--out_json', action='store_true', help='get json output file')
+        '--eval',
+        type=str,
+        nargs='+',
+        choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
+        help='eval types')
+    parser.add_argument('--show', action='store_true', help='show results')
    args = parser.parse_args()
    return args


-args = parse_args()
+def main():
+    args = parse_args()

+    cfg = mmcv.Config.fromfile(args.config)
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True

-def main():
-    cfg = Config.fromfile(args.config)
-    cfg.model['pretrained'] = None
-    # TODO this img_per_gpu
-    cfg.img_per_gpu == 1
-
-    if args.world_size == 1:
-        # TODO verify this part
-        args.dist = False
-        args.img_per_gpu = cfg.img_per_gpu
-        args.data_workers = cfg.data_workers
-        model = Detector(**cfg.model, **meta_params)
+    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
+    if args.gpus == 1:
+        model = build_detector(
+            cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
        load_checkpoint(model, args.checkpoint)
-        test_loader = build_data(cfg.test_dataset, args)
-        model = torch.nn.DataParallel(model, device_ids=0)
-        # TODO write single_test
-        outputs = single_test(test_loader, model)
+        model = MMDataParallel(model, device_ids=[0])
+
+        data_loader = build_dataloader(
+            dataset,
+            imgs_per_gpu=1,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            num_gpus=1,
+            dist=False,
+            shuffle=False)
+        outputs = single_test(model, data_loader, args.show)
    else:
-        test_dataset = CocoDataset(**cfg.test_dataset)
-        model = dict(cfg.model, **cfg.meta_params)
-        outputs = parallel_test(Detector, model,
-                                args.checkpoint, test_dataset, _data_func,
-                                range(args.world_size))
+        model_args = cfg.model.copy()
+        model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
+        model_type = getattr(detectors, model_args.pop('type'))
+        outputs = parallel_test(model_type, model_args, args.checkpoint,
+                                dataset, _data_func, range(args.gpus))

    if args.out:
-        mmcv.dump(outputs, args.out, protocol=4)
-        if args.out_json:
-            results2json(test_dataset, outputs, args.out + '.json')
+        mmcv.dump(outputs, args.out)
+        if args.eval:
+            json_file = args.out + '.json'
+            results2json(dataset, outputs, json_file)
+            coco_eval(json_file, args.eval, dataset.coco)


 if __name__ == '__main__':

--- a/tools/train.py
+++ b/tools/train.py
 from __future__ import division
+
 import argparse
-import sys
-import os.path as osp
-sys.path.append(osp.abspath(osp.join(__file__, '../../')))
-sys.path.append('/mnt/lustre/pangjiangmiao/sensenet_folder/mmcv')
+import logging
+import random
+from collections import OrderedDict

+import numpy as np
 import torch
-import torch.multiprocessing as mp
 from mmcv import Config
-from mmcv.torchpack import Runner
-from mmdet.core import (batch_processor, init_dist, broadcast_params,
-                        DistOptimizerStepperHook, DistSamplerSeedHook)
-from mmdet.datasets.data_engine import build_data
-from mmdet.models import Detector
-from mmdet.nn.parallel import MMDataParallel
+from mmcv.runner import Runner, obj_from_dict, DistSamplerSeedHook
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+
+from mmdet import datasets, __version__
+from mmdet.core import (init_dist, DistOptimizerHook, CocoDistEvalRecallHook,
+                        CocoDistEvalmAPHook)
+from mmdet.datasets import build_dataloader
+from mmdet.models import build_detector, RPN
+
+
+def parse_losses(losses):
+    log_vars = OrderedDict()
+    for loss_name, loss_value in losses.items():
+        if isinstance(loss_value, torch.Tensor):
+            log_vars[loss_name] = loss_value.mean()
+        elif isinstance(loss_value, list):
+            log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
+        else:
+            raise TypeError(
+                '{} is not a tensor or list of tensors'.format(loss_name))
+
+    loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
+
+    log_vars['loss'] = loss
+    for name in log_vars:
+        log_vars[name] = log_vars[name].item()
+
+    return loss, log_vars
+
+
+def batch_processor(model, data, train_mode):
+    losses = model(**data)
+    loss, log_vars = parse_losses(losses)
+
+    outputs = dict(
+        loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
+
+    return outputs
+
+
+def get_logger(log_level):
+    logging.basicConfig(
+        format='%(asctime)s - %(levelname)s - %(message)s', level=log_level)
+    logger = logging.getLogger()
+    return logger
+
+
+def set_random_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)


 def parse_args():
-    parser = argparse.ArgumentParser(description='MMDet train val detector')
+    parser = argparse.ArgumentParser(description='Train a detector')
    parser.add_argument('config', help='train config file path')
-    parser.add_argument('--validate', action='store_true', help='validate')
+    parser.add_argument('--work_dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--validate',
+        action='store_true',
+        help='whether to add a validate phase')
    parser.add_argument(
-        '--dist', action='store_true', help='distributed training or not')
-    parser.add_argument('--world_size', default=1, type=int)
-    parser.add_argument('--rank', default=0, type=int)
+        '--gpus', type=int, default=1, help='number of gpus to use')
+    parser.add_argument('--seed', type=int, help='random seed')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
    args = parser.parse_args()

    return args


-args = parse_args()
-
-
 def main():
-    # Enable distributed training or not
-    if args.dist:
-        print('Enable distributed training.')
-        mp.set_start_method("spawn", force=True)
-        init_dist(
-            args.world_size,
-            args.rank,
-            **cfg.dist_params)
-    else:
-        print('Disabled distributed training.')
+    args = parse_args()

-    # Fetch config information
    cfg = Config.fromfile(args.config)
-    # TODO more flexible
-    args.img_per_gpu = cfg.img_per_gpu
-    args.data_workers = cfg.data_workers
+    if args.work_dir is not None:
+        cfg.work_dir = args.work_dir
+    cfg.gpus = args.gpus
+    # save mmdet version in checkpoint as meta data
+    cfg.checkpoint_config.meta = dict(
+        mmdet_version=__version__, config=cfg.text)
+
+    logger = get_logger(cfg.log_level)

-    # prepare training loader
-    train_loader = [build_data(cfg.train_dataset, args)]
-    if args.validate:
-        val_loader = build_data(cfg.val_dataset, args)
-        train_loader.append(val_loader)
+    # set random seed if specified
+    if args.seed is not None:
+        logger.info('Set random seed to {}'.format(args.seed))
+        set_random_seed(args.seed)
+
+    # init distributed environment if necessary
+    if args.launcher == 'none':
+        dist = False
+        logger.info('Non-distributed training.')
+    else:
+        dist = True
+        init_dist(args.launcher, **cfg.dist_params)
+        if torch.distributed.get_rank() != 0:
+            logger.setLevel('ERROR')
+        logger.info('Distributed training.')
+
+    # prepare data loaders
+    train_dataset = obj_from_dict(cfg.data.train, datasets)
+    data_loaders = [
+        build_dataloader(train_dataset, cfg.data.imgs_per_gpu,
+                         cfg.data.workers_per_gpu, cfg.gpus, dist)
+    ]

    # build model
-    model = Detector(**cfg.model, **cfg.meta_params)
-    if args.dist:
-        model = model.cuda()
-        broadcast_params(model)
+    model = build_detector(
+        cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
+    if dist:
+        model = MMDistributedDataParallel(model.cuda())
    else:
-        device_ids = args.rank % torch.cuda.device_count()
-        model = MMDataParallel(model, device_ids=device_ids).cuda()
+        model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()

-    # register hooks
+    # build runner
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
-    optimizer_stepper = DistOptimizerStepperHook(
-        **cfg.grad_clip_config) if args.dist else cfg.grad_clip_config
-    runner.register_training_hooks(cfg.lr_policy, optimizer_stepper,
+
+    # register hooks
+    optimizer_config = DistOptimizerHook(
+        **cfg.optimizer_config) if dist else cfg.optimizer_config
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config)
-    if args.dist:
+    if dist:
        runner.register_hook(DistSamplerSeedHook())
+        # register eval hooks
+        if args.validate:
+            if isinstance(model.module, RPN):
+                runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
+            elif cfg.data.val.type == 'CocoDataset':
+                runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
+
    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
-    runner.run(train_loader, cfg.workflow, cfg.max_epoch, args=args)
+    runner.run(data_loaders, cfg.workflow, cfg.total_epochs)


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()