set up the codebase skeleton (WIP)

108fc9e1 · Kai Chen · 6985ef31 · 108fc9e1 · 108fc9e1 · 108fc9e1
Commit 108fc9e1 authored Sep 02, 2018 by Kai Chen
20 changed files
--- a/mmdet/nn/parallel/scatter_gather.py
+++ b/mmdet/nn/parallel/scatter_gather.py
+import torch
+from ._functions import Scatter
+from torch.nn.parallel._functions import Scatter as OrigScatter
+from detkit.datasets.utils import DataContainer
+
+
+def scatter(inputs, target_gpus, dim=0):
+    """Scatter inputs to target gpus.
+
+    The only difference from original :func:`scatter` is to add support for
+    :type:`~mmdet.DataContainer`.
+    """
+
+    def scatter_map(obj):
+        if isinstance(obj, torch.Tensor):
+            return OrigScatter.apply(target_gpus, None, dim, obj)
+        if isinstance(obj, DataContainer) and isinstance(obj.data, list):
+            return Scatter.forward(target_gpus, obj.data)
+        if isinstance(obj, tuple) and len(obj) > 0:
+            return list(zip(*map(scatter_map, obj)))
+        if isinstance(obj, list) and len(obj) > 0:
+            return list(map(list, zip(*map(scatter_map, obj))))
+        if isinstance(obj, dict) and len(obj) > 0:
+            return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
+        return [obj for targets in target_gpus]
+
+    # After scatter_map is called, a scatter_map cell will exist. This cell
+    # has a reference to the actual function scatter_map, which has references
+    # to a closure that has a reference to the scatter_map cell (because the
+    # fn is recursive). To avoid this reference cycle, we set the function to
+    # None, clearing the cell
+    try:
+        return scatter_map(inputs)
+    finally:
+        scatter_map = None
+
+
+def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
+    """Scatter with support for kwargs dictionary"""
+    inputs = scatter(inputs, target_gpus, dim) if inputs else []
+    kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
+    if len(inputs) < len(kwargs):
+        inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
+    elif len(kwargs) < len(inputs):
+        kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
+    inputs = tuple(inputs)
+    kwargs = tuple(kwargs)
+    return inputs, kwargs
--- a/mmdet/ops/__init__.py
+++ b/mmdet/ops/__init__.py
+from .nms import nms, soft_nms
+from .roi_align import RoIAlign, roi_align
+from .roi_pool import RoIPool, roi_pool
--- a/mmdet/ops/nms/.gitignore
+++ b/mmdet/ops/nms/.gitignore
+*.cpp
--- a/mmdet/ops/nms/Makefile
+++ b/mmdet/ops/nms/Makefile
+PYTHON=${PYTHON:-python}
+
+all:
+	echo "Compiling nms kernels..."
+	$(PYTHON) setup.py build_ext --inplace
+
+clean:
+	rm *.so
--- a/mmdet/ops/nms/__init__.py
+++ b/mmdet/ops/nms/__init__.py
+from .nms_wrapper import nms, soft_nms
--- a/mmdet/ops/nms/cpu_nms.pyx
+++ b/mmdet/ops/nms/cpu_nms.pyx
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+
+def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
+
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+
+    keep = []
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        keep.append(i)
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+
+    return keep
--- a/mmdet/ops/nms/cpu_soft_nms.pyx
+++ b/mmdet/ops/nms/cpu_soft_nms.pyx
+# ----------------------------------------------------------
+# Soft-NMS: Improving Object Detection With One Line of Code
+# Copyright (c) University of Maryland, College Park
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Navaneeth Bodla and Bharat Singh
+# ----------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+
+def cpu_soft_nms(
+    np.ndarray[float, ndim=2] boxes_in,
+    float sigma=0.5,
+    float Nt=0.3,
+    float threshold=0.001,
+    unsigned int method=0
+):
+    boxes = boxes_in.copy()
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
+    inds = np.arange(N)
+
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+        ti = inds[i]
+
+        pos = i + 1
+        # get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+
+        # add max box as a detection
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+        inds[i] = inds[maxpos]
+
+        # swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+        inds[maxpos] = ti
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # NMS iterations, note that N changes if detection boxes fall below
+        # threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+
+                    if method == 1: # linear
+                        if ov > Nt:
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov)/sigma)
+                    else: # original NMS
+                        if ov > Nt:
+                            weight = 0
+                        else:
+                            weight = 1
+
+                    boxes[pos, 4] = weight*boxes[pos, 4]
+
+                    # if box score falls below threshold, discard the box by
+                    # swapping with last box update N
+                    if boxes[pos, 4] < threshold:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        inds[pos] = inds[N-1]
+                        N = N - 1
+                        pos = pos - 1
+
+            pos = pos + 1
+
+    return boxes[:N], inds[:N]
\ No newline at end of file
--- a/mmdet/ops/nms/gpu_nms.hpp
+++ b/mmdet/ops/nms/gpu_nms.hpp
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id, size_t base);
+size_t nms_Malloc();
--- a/mmdet/ops/nms/gpu_nms.pyx
+++ b/mmdet/ops/nms/gpu_nms.pyx
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+assert sizeof(int) == sizeof(np.int32_t)
+
+cdef extern from "gpu_nms.hpp":
+    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int, size_t) nogil
+    size_t nms_Malloc() nogil
+
+memory_pool = {}
+
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+            np.int32_t device_id=0):
+    cdef int boxes_num = dets.shape[0]
+    cdef int boxes_dim = dets.shape[1]
+    cdef int num_out
+    cdef size_t base
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        keep = np.zeros(boxes_num, dtype=np.int32)
+    cdef np.ndarray[np.float32_t, ndim=1] \
+        scores = dets[:, 4]
+    cdef np.ndarray[np.int_t, ndim=1] \
+        order = scores.argsort()[::-1]
+    cdef np.ndarray[np.float32_t, ndim=2] \
+        sorted_dets = dets[order, :]
+    cdef float cthresh = thresh
+    if device_id not in memory_pool:
+        with nogil:
+            base = nms_Malloc()
+        memory_pool[device_id] = base
+        # print "malloc", base
+    base = memory_pool[device_id]
+    with nogil:
+        _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, cthresh, device_id, base)
+    keep = keep[:num_out]
+    return list(order[keep])
--- a/mmdet/ops/nms/nms_kernel.cu
+++ b/mmdet/ops/nms/nms_kernel.cu
--- a/mmdet/ops/nms/nms_wrapper.py
+++ b/mmdet/ops/nms/nms_wrapper.py
--- a/mmdet/ops/nms/setup.py
+++ b/mmdet/ops/nms/setup.py
--- a/mmdet/ops/roi_align/__init__.py
+++ b/mmdet/ops/roi_align/__init__.py
+from .functions.roi_align import roi_align
+from .modules.roi_align import RoIAlign
--- a/mmdet/ops/roi_align/functions/__init__.py
+++ b/mmdet/ops/roi_align/functions/__init__.py
--- a/mmdet/ops/roi_align/functions/roi_align.py
+++ b/mmdet/ops/roi_align/functions/roi_align.py
--- a/mmdet/ops/roi_align/gradcheck.py
+++ b/mmdet/ops/roi_align/gradcheck.py
--- a/mmdet/ops/roi_align/modules/__init__.py
+++ b/mmdet/ops/roi_align/modules/__init__.py
--- a/mmdet/ops/roi_align/modules/roi_align.py
+++ b/mmdet/ops/roi_align/modules/roi_align.py
+from torch.nn.modules.module import Module
+from ..functions.roi_align import RoIAlignFunction
+
+
+class RoIAlign(Module):
+
+    def __init__(self, out_size, spatial_scale, sample_num=0):
+        super(RoIAlign, self).__init__()
+
+        self.out_size = out_size
+        self.spatial_scale = float(spatial_scale)
+        self.sample_num = int(sample_num)
+
+    def forward(self, features, rois):
+        return RoIAlignFunction.apply(features, rois, self.out_size,
+                                      self.spatial_scale, self.sample_num)
--- a/mmdet/ops/roi_align/setup.py
+++ b/mmdet/ops/roi_align/setup.py
--- a/mmdet/ops/roi_align/src/roi_align_cuda.cpp
+++ b/mmdet/ops/roi_align/src/roi_align_cuda.cpp