"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "3f41b48c4079d0adaf17ce7adf308c6a0d947ad0"
Unverified Commit ba73bcc5 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #257 from open-mmlab/pytorch-1.0

Support Pytorch 1.0
parents b6561a1a e83e5d0f
......@@ -4,13 +4,13 @@
- Linux (tested on Ubuntu 16.04 and CentOS 7.2)
- Python 3.4+
- PyTorch 0.4.1
- PyTorch 1.0
- Cython
- [mmcv](https://github.com/open-mmlab/mmcv)
- [mmcv](https://github.com/open-mmlab/mmcv) >= 0.2.2
### Install mmdetection
a. Install PyTorch 0.4.1 and torchvision following the [official instructions](https://pytorch.org/).
a. Install PyTorch 1.0 and torchvision following the [official instructions](https://pytorch.org/).
b. Clone the mmdetection repository.
......
......@@ -10,11 +10,17 @@
### Software environment
- Python 3.6 / 3.7
- PyTorch 0.4.1
- PyTorch 1.0
- CUDA 9.0.176
- CUDNN 7.0.4
- NCCL 2.1.15
Note: The train time was measured with PyTorch 0.4.1. We will update it later, which should be about 0.02s ~ 0.05s faster.
## Mirror sites
We use AWS as the main site to host our model zoo, and maintain a mirror on aliyun.
You can replace `https://s3.ap-northeast-2.amazonaws.com` with `https://open-mmlab.oss-cn-beijing.aliyuncs.com` in model urls.
## Common settings
......
......@@ -3,6 +3,9 @@
## Introduction
The master branch works with **PyTorch 1.0**. If you would like to use PyTorch 0.4.1,
please checkout to the [pytorch-0.4.1](https://github.com/open-mmlab/mmdetection/tree/pytorch-0.4.1) branch.
mmdetection is an open source object detection toolbox based on PyTorch. It is
a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).
......@@ -36,6 +39,9 @@ This project is released under the [Apache 2.0 license](LICENSE).
## Updates
v0.6rc0(06/02/2019)
- Migrate to PyTorch 1.0.
v0.5.7 (06/02/2019)
- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))
- This is the last release based on PyTorch 0.4.1.
......
......@@ -34,13 +34,21 @@ def sigmoid_focal_loss(pred,
weight,
gamma=2.0,
alpha=0.25,
reduction='elementwise_mean'):
reduction='mean'):
pred_sigmoid = pred.sigmoid()
pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
weight = (alpha * target + (1 - alpha) * (1 - target)) * weight
weight = weight * pt.pow(gamma)
return F.binary_cross_entropy_with_logits(
pred, target, weight, reduction=reduction)
loss = F.binary_cross_entropy_with_logits(
pred, target, reduction='none') * weight
reduction_enum = F._Reduction.get_enum(reduction)
# none: 0, mean:1, sum: 2
if reduction_enum == 0:
return loss
elif reduction_enum == 1:
return loss.mean()
elif reduction_enum == 2:
return loss.sum()
def weighted_sigmoid_focal_loss(pred,
......@@ -62,22 +70,22 @@ def mask_cross_entropy(pred, target, label):
inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits(
pred_slice, target, reduction='elementwise_mean')[None]
pred_slice, target, reduction='mean')[None]
def smooth_l1_loss(pred, target, beta=1.0, reduction='elementwise_mean'):
def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'):
assert beta > 0
assert pred.size() == target.size() and target.numel() > 0
diff = torch.abs(pred - target)
loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
reduction = F._Reduction.get_enum(reduction)
# none: 0, elementwise_mean:1, sum: 2
if reduction == 0:
reduction_enum = F._Reduction.get_enum(reduction)
# none: 0, mean:1, sum: 2
if reduction_enum == 0:
return loss
elif reduction == 1:
elif reduction_enum == 1:
return loss.sum() / pred.numel()
elif reduction == 2:
elif reduction_enum == 2:
return loss.sum()
......
This diff is collapsed.
// modify from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
// based on
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob /mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
#include <torch/torch.h>
#include <torch/extension.h>
#include <cmath>
#include <vector>
void DeformablePSROIPoolForward(const at::Tensor data,
const at::Tensor bbox,
const at::Tensor trans,
at::Tensor out,
at::Tensor top_count,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
void DeformablePSROIPoolForward(
const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
at::Tensor out, at::Tensor top_count, const int batch, const int channels,
const int height, const int width, const int num_bbox,
const int channels_trans, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
const at::Tensor data,
const at::Tensor bbox,
const at::Tensor trans,
const at::Tensor top_count,
at::Tensor in_grad,
at::Tensor trans_grad,
const int batch,
const int channels,
const int height,
const int width,
const int num_bbox,
const int channels_trans,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std);
void DeformablePSROIPoolBackwardAcc(
const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
at::Tensor trans_grad, const int batch, const int channels,
const int height, const int width, const int num_bbox,
const int channels_trans, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
at::Tensor trans,
at::Tensor out, at::Tensor top_count,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
void deform_psroi_pooling_cuda_forward(
at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
const int batch = input.size(0);
......@@ -75,33 +45,18 @@ void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
out.size(0), num_bbox);
DeformablePSROIPoolForward(input, bbox, trans, out, top_count,
batch, channels, height, width,
num_bbox,
channels_trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
DeformablePSROIPoolForward(
input, bbox, trans, out, top_count, batch, channels, height, width,
num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
pooled_size, part_size, sample_per_part, trans_std);
}
void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
at::Tensor input, at::Tensor bbox,
at::Tensor trans, at::Tensor top_count,
at::Tensor input_grad, at::Tensor trans_grad,
const int no_trans,
const float spatial_scale,
const int output_dim,
const int group_size,
const int pooled_size,
const int part_size,
const int sample_per_part,
const float trans_std)
{
void deform_psroi_pooling_cuda_backward(
at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
......@@ -116,29 +71,17 @@ void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
out_grad.size(0), num_bbox);
DeformablePSROIPoolBackwardAcc(out_grad,
input,
bbox,
trans,
top_count,
input_grad,
trans_grad,
batch, channels, height, width, num_bbox,
channels_trans,
no_trans,
spatial_scale,
output_dim,
group_size,
pooled_size,
part_size,
sample_per_part,
trans_std);
DeformablePSROIPoolBackwardAcc(
out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
channels, height, width, num_bbox, channels_trans, no_trans,
spatial_scale, output_dim, group_size, pooled_size, part_size,
sample_per_part, trans_std);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
"deform psroi pooling forward(CUDA)");
m.def("deform_psroi_pooling_cuda_backward", &deform_psroi_pooling_cuda_backward,
m.def("deform_psroi_pooling_cuda_backward",
&deform_psroi_pooling_cuda_backward,
"deform psroi pooling backward(CUDA)");
}
\ No newline at end of file
......@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
# cython: language_level=3, boundscheck=False
import numpy as np
cimport numpy as np
......
......@@ -6,6 +6,8 @@
# Modified by Kai Chen
# ----------------------------------------------------------
# cython: language_level=3, boundscheck=False
import numpy as np
cimport numpy as np
......
......@@ -5,6 +5,8 @@
# Written by Ross Girshick
# --------------------------------------------------------
# cython: language_level=3, boundscheck=False
import numpy as np
cimport numpy as np
......
from torch.autograd import Function, Variable
from torch.autograd import Function
from .. import roi_align_cuda
......@@ -49,11 +49,11 @@ class RoIAlignFunction(Function):
grad_input = grad_rois = None
if ctx.needs_input_grad[0]:
grad_input = Variable(
rois.new(batch_size, num_channels, data_height, data_width)
.zero_())
roi_align_cuda.backward(grad_output, rois, out_h, out_w,
spatial_scale, sample_num, grad_input)
grad_input = rois.new_zeros(batch_size, num_channels, data_height,
data_width)
roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
out_w, spatial_scale, sample_num,
grad_input)
return grad_input, grad_rois, None, None, None
......
#include <torch/torch.h>
#include <torch/extension.h>
#include <cmath>
#include <vector>
......
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848)
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
......@@ -144,12 +142,7 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
sample_num, channels, height, width, pooled_height,
pooled_width, top_data);
}));
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
THCudaCheck(cudaGetLastError());
return 1;
}
......@@ -280,8 +273,7 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
at::Tensor bottom_grad) {
const int output_size = num_rois * pooled_height * pooled_width * channels;
// TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved
AT_DISPATCH_FLOATING_TYPES(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
top_grad.type(), "ROIAlignLaucherBackward", ([&] {
const scalar_t *top_diff = top_grad.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
......@@ -297,11 +289,6 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
channels, height, width, pooled_height, pooled_width,
bottom_diff);
}));
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
THCudaCheck(cudaGetLastError());
return 1;
}
......@@ -24,9 +24,8 @@ class RoIPoolFunction(Function):
num_channels = features.size(1)
num_rois = rois.size(0)
out_size = (num_rois, num_channels, out_h, out_w)
output = features.new_zeros(*out_size)
argmax = features.new_zeros(*out_size, dtype=torch.int)
output = features.new_zeros(out_size)
argmax = features.new_zeros(out_size, dtype=torch.int)
roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
output, argmax)
ctx.spatial_scale = spatial_scale
......@@ -46,9 +45,9 @@ class RoIPoolFunction(Function):
grad_input = grad_rois = None
if ctx.needs_input_grad[0]:
grad_input = grad_output.new(feature_size).zero_()
roi_pool_cuda.backward(grad_output, rois, argmax, spatial_scale,
grad_input)
grad_input = grad_output.new_zeros(feature_size)
roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
spatial_scale, grad_input)
return grad_input, grad_rois, None, None
......
#include <torch/torch.h>
#include <torch/extension.h>
#include <cmath>
#include <vector>
......
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
using namespace at; // temporal fix for pytorch<=0.4.1 (see #9848)
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
......@@ -100,11 +98,7 @@ int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
channels, height, width, pooled_h, pooled_w, top_data,
argmax_data);
}));
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
THCudaCheck(cudaGetLastError());
return 1;
}
......@@ -139,8 +133,7 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
const int pooled_w, at::Tensor bottom_grad) {
const int output_size = num_rois * pooled_h * pooled_w * channels;
// TODO: use AT_DISPATCH_FLOATING_TYPES_AND_HALF when atomicAdd is resolved
AT_DISPATCH_FLOATING_TYPES(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
top_grad.type(), "ROIPoolLaucherBackward", ([&] {
const scalar_t *top_diff = top_grad.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
......@@ -158,11 +151,6 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
scalar_t(spatial_scale), channels, height, width, pooled_h,
pooled_w, bottom_diff);
}));
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
THCudaCheck(cudaGetLastError());
return 1;
}
......@@ -11,8 +11,8 @@ def readme():
MAJOR = 0
MINOR = 5
PATCH = 7
MINOR = 6
PATCH = 'rc0'
SUFFIX = ''
SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment