Unverified Commit ce461dae authored by Hang Zhang's avatar Hang Zhang Committed by GitHub
Browse files

V1.0.0 (#156)

* v1.0
parent c2cb2aab
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
......
#include <torch/torch.h>
// CPU declarations
at::Tensor ROIAlignForwardCPU(
const at::Tensor& input,
const at::Tensor& bottom_rois,
int64_t pooled_height,
int64_t pooled_width,
double spatial_scale,
int64_t sampling_ratio);
at::Tensor ROIAlignBackwardCPU(
const at::Tensor& bottom_rois,
const at::Tensor& grad_output, // gradient of the output of the layer
int64_t b_size,
int64_t channels,
int64_t height,
int64_t width,
int64_t pooled_height,
int64_t pooled_width,
double spatial_scale,
int64_t sampling_ratio);
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("roi_align_forward", &ROIAlignForwardCPU, "ROI Align forward (CPU)");
m.def("roi_align_backward", &ROIAlignBackwardCPU, "ROI Align backward (CPU)");
}
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
//#include <omp.h>
......
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <vector>
......
#include <vector>
#include <torch/extension.h>
#include <ATen/ATen.h>
// #include <ATen/cuda/CUDAContext.h>
#include <cuda_runtime_api.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
namespace {
template<typename T>
inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
// Create thrust pointers
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
thrust::transform_if(th_dz, th_dz + count, th_z, th_dz,
[slope] __device__ (const T& dz) { return dz * slope; },
[] __device__ (const T& z) { return z < 0; });
thrust::transform_if(th_z, th_z + count, th_z,
[slope] __device__ (const T& z) { return z / slope; },
[] __device__ (const T& z) { return z < 0; });
}
}
void LeakyRelu_Forward_CUDA(at::Tensor z, float slope) {
at::leaky_relu_(z, slope);
}
void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope) {
int64_t count = z.numel();
AT_DISPATCH_FLOATING_TYPES(z.type(), "LeakyRelu_Backward_CUDA", ([&] {
leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
}));
/*
// unstable after scaling
at::leaky_relu_(z, 1.0 / slope);
at::leaky_relu_backward(dz, z, slope);
*/
}
#include <vector>
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
......
#include <vector>
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <ATen/Functions.h>
#include <ATen/cuda/CUDAContext.h>
......
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include "ATen/NativeFunctions.h"
#include <ATen/cuda/CUDAContext.h>
......
......@@ -9,9 +9,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("scaled_l2_forward", &ScaledL2_Forward_CUDA, "ScaledL2 forward (CUDA)");
m.def("scaled_l2_backward", &ScaledL2_Backward_CUDA, "ScaledL2 backward (CUDA)");
m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)");
m.def("batchnorm_inp_forward", &BatchNorm_Forward_Inp_CUDA, "BatchNorm forward (CUDA)");
m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)");
m.def("sumsquare_forward", &Sum_Square_Forward_CUDA, "SumSqu forward (CUDA)");
m.def("sumsquare_backward", &Sum_Square_Backward_CUDA, "SumSqu backward (CUDA)");
m.def("batchnorm_inp_backward", &BatchNorm_Inp_Backward_CUDA, "BatchNorm backward (CUDA)");
m.def("expectation_forward", &Expectation_Forward_CUDA, "Expectation forward (CUDA)");
m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)");
m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA,
"Inplace Expectation backward (CUDA)");
m.def("encoding_dist_forward", &Encoding_Dist_Forward_CUDA, "EncDist forward (CUDA)");
m.def("encoding_dist_backward", &Encoding_Dist_Backward_CUDA, "Assign backward (CUDA)");
m.def("encoding_dist_inference_forward", &Encoding_Dist_Inference_Forward_CUDA,
......@@ -20,4 +24,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"Assign Inference backward (CUDA)");
m.def("aggregatev2_forward", &AggregateV2_Forward_CUDA, "AggregateV2 forward (CUDA)");
m.def("aggregatev2_backward", &AggregateV2_Backward_CUDA, "AggregateV2 backward (CUDA)");
m.def("leaky_relu_forward", &LeakyRelu_Forward_CUDA, "Learky ReLU forward (CUDA)");
m.def("leaky_relu_backward", &LeakyRelu_Backward_CUDA, "Learky ReLU backward (CUDA)");
}
#include <torch/torch.h>
#include <torch/extension.h>
#include <vector>
at::Tensor ROIAlign_Forward_CUDA(
......@@ -54,24 +54,53 @@ at::Tensor BatchNorm_Forward_CUDA(
const at::Tensor mean_,
const at::Tensor std_,
const at::Tensor gamma_,
const at::Tensor beta_);
const at::Tensor beta_,
float eps);
at::Tensor BatchNorm_Forward_Inp_CUDA(
const at::Tensor input_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
float eps);
std::vector<at::Tensor> BatchNorm_Backward_CUDA(
const at::Tensor gradoutput_,
const at::Tensor input_,
const at::Tensor mean_,
const at::Tensor std_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
bool train);
float eps);
std::vector<at::Tensor> Sum_Square_Forward_CUDA(
std::vector<at::Tensor> BatchNorm_Inp_Backward_CUDA(
const at::Tensor gradoutput_,
const at::Tensor output_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
float eps);
std::vector<at::Tensor> Expectation_Forward_CUDA(
const at::Tensor input_);
at::Tensor Sum_Square_Backward_CUDA(
at::Tensor Expectation_Backward_CUDA(
const at::Tensor input_,
const at::Tensor gradSum_,
const at::Tensor gradSquare_);
const at::Tensor gradEx_,
const at::Tensor gradExs_);
at::Tensor Expectation_Inp_Backward_CUDA(
const at::Tensor gradInput_,
const at::Tensor output_,
const at::Tensor gradEx_,
const at::Tensor gradExs_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
float eps);
at::Tensor Encoding_Dist_Inference_Forward_CUDA(
const at::Tensor X_,
......@@ -111,3 +140,7 @@ std::vector<at::Tensor> AggregateV2_Backward_CUDA(
const at::Tensor X_,
const at::Tensor C_,
const at::Tensor STD_);
void LeakyRelu_Forward_CUDA(at::Tensor z, float slope);
void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope);
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
......
......@@ -6,6 +6,7 @@ setup(
ext_modules=[
CUDAExtension('enclib_gpu', [
'operator.cpp',
'activation_kernel.cu',
'encoding_kernel.cu',
'encodingv2_kernel.cu',
'syncbn_kernel.cu',
......
#include <vector>
#include <torch/tensor.h>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
......@@ -11,14 +11,14 @@ namespace {
template <typename DType, typename Acctype, typename DeviceTensor3>
struct GradOp {
__device__ GradOp(Acctype m, const DeviceTensor3 i, const DeviceTensor3 g)
: mean(m), input(i), gradOutput(g) {}
: beta(m), output(i), gradOutput(g) {}
__device__ __forceinline__ Float2<DType, Acctype> operator()(int batch, int plane, int n) {
DType g = gradOutput[batch][plane][n];
DType c = ScalarConvert<Acctype, DType>::to(input[batch][plane][n] - mean);
DType c = ScalarConvert<Acctype, DType>::to(output[batch][plane][n] - beta);
return Float2<DType, Acctype>(g, g * c);
}
const Acctype mean;
const DeviceTensor3 input;
const Acctype beta;
const DeviceTensor3 output;
const DeviceTensor3 gradOutput;
};
......@@ -88,6 +88,72 @@ __global__ void BatchNorm_Forward_kernel (
}
}
template <typename DType>
__global__ void BatchNorm_Forward_Inp_kernel (
DeviceTensor<DType, 3> input,
DeviceTensor<DType, 1> mean,
DeviceTensor<DType, 1> std,
DeviceTensor<DType, 1> gamma,
DeviceTensor<DType, 1> beta) {
int c = blockIdx.x;
/* main operation */
for (int b = 0; b < input.getSize(0); ++b) {
for (int x = threadIdx.x; x < input.getSize(2); x += blockDim.x) {
DType inp = input[b][c][x];
input[b][c][x] = gamma[c] * (inp - mean[c]) /
std[c] + beta[c];
}
}
}
template <typename DType>
__global__ void BatchNorm_Backward_Inp_kernel (
DeviceTensor<DType, 3> gradoutput,
DeviceTensor<DType, 3> output,
DeviceTensor<DType, 3> gradinput,
DeviceTensor<DType, 1> gradgamma,
DeviceTensor<DType, 1> gradbeta,
DeviceTensor<DType, 1> mean,
DeviceTensor<DType, 1> std,
DeviceTensor<DType, 1> gamma,
DeviceTensor<DType, 1> beta,
DeviceTensor<DType, 1> gradEx,
DeviceTensor<DType, 1> gradExs) {
/* declarations of the variables */
/* Get the index and channels */
int c = blockIdx.x;
/* main operation */
GradOp<DType, DType, DeviceTensor<DType, 3>> g(beta[c], output, gradoutput);
Float2<DType, DType> res = reduce<Float2<DType, DType>,
GradOp<DType, DType, DeviceTensor<DType, 3>>,
DeviceTensor<DType, 3>>(g, gradoutput, c);
DType gradOutputSum = res.v1;
DType dotP = res.v2;
DType invstd = DType(1.0) / std[c];
DType gradScale = invstd * gamma[c];
if (threadIdx.x == 0) {
gradEx[c] = - gradOutputSum * gradScale + mean[c] * invstd * invstd * dotP;
gradExs[c] = - 0.5 * invstd * invstd * dotP;
}
if (gradinput.numElements() > 0) {
for (int batch = 0; batch < gradoutput.getSize(0); ++batch) {
for (int x = threadIdx.x; x < gradoutput.getSize(2); x += blockDim.x) {
gradinput[batch][c][x] = gradoutput[batch][c][x] * gradScale;
}
}
}
if (gradgamma.numElements() > 0) {
if (threadIdx.x == 0) {
gradgamma[c] += dotP / gamma[c];
}
}
if (gradbeta.numElements() > 0) {
if (threadIdx.x == 0) {
gradbeta[c] += gradOutputSum;
}
}
}
template <typename DType>
__global__ void BatchNorm_Backward_kernel (
DeviceTensor<DType, 3> gradoutput,
......@@ -99,9 +165,8 @@ __global__ void BatchNorm_Backward_kernel (
DeviceTensor<DType, 1> std,
DeviceTensor<DType, 1> gamma,
DeviceTensor<DType, 1> beta,
DeviceTensor<DType, 1> gradMean,
DeviceTensor<DType, 1> gradStd,
bool train) {
DeviceTensor<DType, 1> gradEx,
DeviceTensor<DType, 1> gradExs) {
/* declarations of the variables */
/* Get the index and channels */
int c = blockIdx.x;
......@@ -114,9 +179,9 @@ __global__ void BatchNorm_Backward_kernel (
DType dotP = res.v2;
DType invstd = DType(1.0) / std[c];
DType gradScale = invstd * gamma[c];
if (train && threadIdx.x == 0) {
gradMean[c] = - gradOutputSum * gamma[c] * invstd;
gradStd[c] = - dotP * gamma[c] * invstd * invstd;
if (threadIdx.x == 0) {
gradEx[c] = - gradOutputSum * gradScale + mean[c] * invstd * invstd * dotP * gradScale;
gradExs[c] = - 0.5 * invstd * invstd * dotP * gradScale;
}
if (gradinput.numElements() > 0) {
for (int batch = 0; batch < gradoutput.getSize(0); ++batch) {
......@@ -139,10 +204,11 @@ __global__ void BatchNorm_Backward_kernel (
template <typename DType>
__global__ void Sum_Square_Forward_kernel (
__global__ void Expectation_Forward_kernel (
DeviceTensor<DType, 3> input,
DeviceTensor<DType, 1> sum,
DeviceTensor<DType, 1> square) {
DeviceTensor<DType, 1> ex,
DeviceTensor<DType, 1> exs,
DType norm) {
int c = blockIdx.x;
/* main operation */
SumOp<DType, DType> g(input);
......@@ -151,37 +217,60 @@ __global__ void Sum_Square_Forward_kernel (
DType xsum = res.v1;
DType xsquare = res.v2;
if (threadIdx.x == 0) {
sum[c] = xsum;
square[c] = xsquare;
ex[c] = xsum * norm;
exs[c] = xsquare * norm;
}
}
template <typename DType>
__global__ void Sum_Square_Backward_kernel (
__global__ void Expectation_Backward_kernel (
DeviceTensor<DType, 3> gradInput,
DeviceTensor<DType, 3> input,
DeviceTensor<DType, 1> gradSum,
DeviceTensor<DType, 1> gradSquare) {
DeviceTensor<DType, 1> gradEx,
DeviceTensor<DType, 1> gradExs,
DType norm) {
int c = blockIdx.x;
/* main operation */
for (int batch = 0; batch < gradInput.getSize(0); ++batch) {
for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x)
{
gradInput[batch][c][x] = gradSum[c] + 2 * gradSquare[c] *
input[batch][c][x];
for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x) {
gradInput[batch][c][x] = gradEx[c] * norm + 2 * gradExs[c] *
input[batch][c][x] * norm;
}
}
}
} // namespcae
template <typename DType>
__global__ void Expectation_Backward_Inp_kernel (
DeviceTensor<DType, 3> gradInput,
DeviceTensor<DType, 3> output,
DeviceTensor<DType, 1> gradEx,
DeviceTensor<DType, 1> gradExs,
DeviceTensor<DType, 1> mean,
DeviceTensor<DType, 1> std,
DeviceTensor<DType, 1> gamma,
DeviceTensor<DType, 1> beta,
DType norm) {
int c = blockIdx.x;
/* main operation */
for (int batch = 0; batch < gradInput.getSize(0); ++batch) {
for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x) {
gradInput[batch][c][x] += gradEx[c] * norm + 2 * gradExs[c] *
((output[batch][c][x] - beta[c]) / gamma[c] * std[c] + mean[c]) * norm;
}
}
}
} // namespace
at::Tensor BatchNorm_Forward_CUDA(
const at::Tensor input_,
const at::Tensor mean_,
const at::Tensor std_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_) {
const at::Tensor beta_,
float eps) {
auto output_ = at::zeros_like(input_);
auto std_ = (exs_ - ex_ * ex_ + eps).sqrt();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(input_.size(1));
dim3 threads(getNumThreads(input_.size(2)));
......@@ -189,85 +278,157 @@ at::Tensor BatchNorm_Forward_CUDA(
/* Device tensors */
DeviceTensor<scalar_t, 3> output = devicetensor<scalar_t, 3>(output_);
DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
DeviceTensor<scalar_t, 1> mean = devicetensor<scalar_t, 1>(mean_);
DeviceTensor<scalar_t, 1> ex = devicetensor<scalar_t, 1>(ex_);
DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
/* kernel function */
BatchNorm_Forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
output, input, mean, std, gamma, beta);
output, input, ex, std, gamma, beta);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return output_;
}
at::Tensor BatchNorm_Forward_Inp_CUDA(
const at::Tensor input_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
float eps) {
auto std_ = (exs_ - ex_ * ex_ + eps).sqrt();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(input_.size(1));
dim3 threads(getNumThreads(input_.size(2)));
AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Forward_CUDA", ([&] {
/* Device tensors */
DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
DeviceTensor<scalar_t, 1> ex = devicetensor<scalar_t, 1>(ex_);
DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
/* kernel function */
BatchNorm_Forward_Inp_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
input, ex, std, gamma, beta);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return input_;
}
std::vector<at::Tensor> BatchNorm_Inp_Backward_CUDA(
const at::Tensor gradoutput_,
const at::Tensor output_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
float eps) {
/* outputs*/
auto std_ = (exs_ - ex_ * ex_ + eps).sqrt();
auto gradinput_ = at::zeros_like(output_);
auto gradgamma_ = at::zeros_like(gamma_);
auto gradbeta_ = at::zeros_like(beta_);
auto gradEx_ = at::zeros_like(ex_);
auto gradExs_ = at::zeros_like(std_);
/* cuda utils*/
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(output_.size(1));
dim3 threads(getNumThreads(output_.size(2)));
AT_DISPATCH_FLOATING_TYPES(output_.type(), "BatchNorm_Inp_Backward_CUDA", ([&] {
/* Device tensors */
DeviceTensor<scalar_t, 3> gradoutput = devicetensor<scalar_t, 3>(gradoutput_);
DeviceTensor<scalar_t, 3> output = devicetensor<scalar_t, 3>(output_);
DeviceTensor<scalar_t, 3> gradinput = devicetensor<scalar_t, 3>(gradinput_);
DeviceTensor<scalar_t, 1> gradgamma = devicetensor<scalar_t, 1>(gradgamma_);
DeviceTensor<scalar_t, 1> gradbeta = devicetensor<scalar_t, 1>(gradbeta_);
DeviceTensor<scalar_t, 1> ex = devicetensor<scalar_t, 1>(ex_);
DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
DeviceTensor<scalar_t, 1> gradEx = devicetensor<scalar_t, 1>(gradEx_);
DeviceTensor<scalar_t, 1> gradExs = devicetensor<scalar_t, 1>(gradExs_);
/* kernel function */
BatchNorm_Backward_Inp_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(
gradoutput, output, gradinput, gradgamma, gradbeta, ex, std,
gamma, beta, gradEx, gradExs);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return {gradinput_, gradEx_, gradExs_, gradgamma_, gradbeta_};
}
std::vector<at::Tensor> BatchNorm_Backward_CUDA(
const at::Tensor gradoutput_,
const at::Tensor input_,
const at::Tensor mean_,
const at::Tensor std_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
bool train) {
float eps) {
/* outputs*/
at::Tensor gradinput_ = at::zeros_like(input_);
at::Tensor gradgamma_ = at::zeros_like(gamma_);
at::Tensor gradbeta_ = at::zeros_like(beta_);
at::Tensor gradMean_ = at::zeros_like(mean_);
at::Tensor gradStd_ = at::zeros_like(std_);
auto std_ = (exs_ - ex_ * ex_ + eps).sqrt();
auto gradinput_ = at::zeros_like(input_);
auto gradgamma_ = at::zeros_like(gamma_);
auto gradbeta_ = at::zeros_like(beta_);
auto gradEx_ = at::zeros_like(ex_);
auto gradExs_ = at::zeros_like(std_);
/* cuda utils*/
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(input_.size(1));
dim3 threads(getNumThreads(input_.size(2)));
AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Backward_CUDA", ([&] {
AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Inp_Backward_CUDA", ([&] {
/* Device tensors */
DeviceTensor<scalar_t, 3> gradoutput = devicetensor<scalar_t, 3>(gradoutput_);
DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
DeviceTensor<scalar_t, 3> gradinput = devicetensor<scalar_t, 3>(gradinput_);
DeviceTensor<scalar_t, 1> gradgamma = devicetensor<scalar_t, 1>(gradgamma_);
DeviceTensor<scalar_t, 1> gradbeta = devicetensor<scalar_t, 1>(gradbeta_);
DeviceTensor<scalar_t, 1> mean = devicetensor<scalar_t, 1>(mean_);
DeviceTensor<scalar_t, 1> ex = devicetensor<scalar_t, 1>(ex_);
DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
DeviceTensor<scalar_t, 1> gradMean = devicetensor<scalar_t, 1>(gradMean_);
DeviceTensor<scalar_t, 1> gradStd = devicetensor<scalar_t, 1>(gradStd_);
DeviceTensor<scalar_t, 1> gradEx = devicetensor<scalar_t, 1>(gradEx_);
DeviceTensor<scalar_t, 1> gradExs = devicetensor<scalar_t, 1>(gradExs_);
/* kernel function */
BatchNorm_Backward_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(
gradoutput, input, gradinput, gradgamma, gradbeta, mean, std,
gamma, beta, gradMean, gradStd, train);
gradoutput, input, gradinput, gradgamma, gradbeta, ex, std,
gamma, beta, gradEx, gradExs);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return {gradinput_, gradMean_, gradStd_, gradgamma_, gradbeta_};
return {gradinput_, gradEx_, gradExs_, gradgamma_, gradbeta_};
}
std::vector<at::Tensor> Sum_Square_Forward_CUDA(
std::vector<at::Tensor> Expectation_Forward_CUDA(
const at::Tensor input_) {
/* outputs */
at::Tensor sum_ = torch::zeros({input_.size(1)}, input_.options());
at::Tensor square_ = torch::zeros({input_.size(1)}, input_.options());
auto ex_ = torch::zeros({input_.size(1)}, input_.options());
auto exs_ = torch::zeros({input_.size(1)}, input_.options());
/* cuda utils*/
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(input_.size(1));
dim3 threads(getNumThreads(input_.size(2)));
AT_DISPATCH_FLOATING_TYPES(input_.type(), "SumSquare_forward_CUDA", ([&] {
scalar_t norm = scalar_t(1) / (input_.size(0) * input_.size(2));
/* Device tensors */
DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
DeviceTensor<scalar_t, 1> sum = devicetensor<scalar_t, 1>(sum_);
DeviceTensor<scalar_t, 1> square = devicetensor<scalar_t, 1>(square_);
DeviceTensor<scalar_t, 1> ex = devicetensor<scalar_t, 1>(ex_);
DeviceTensor<scalar_t, 1> exs = devicetensor<scalar_t, 1>(exs_);
/* kernel function */
Sum_Square_Forward_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(input, sum, square);
Expectation_Forward_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(input, ex, exs, norm);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return {sum_, square_};
return {ex_, exs_};
}
at::Tensor Sum_Square_Backward_CUDA(
at::Tensor Expectation_Backward_CUDA(
const at::Tensor input_,
const at::Tensor gradSum_,
const at::Tensor gradSquare_) {
const at::Tensor gradEx_,
const at::Tensor gradExs_) {
/* outputs */
at::Tensor gradInput_ = at::zeros_like(input_);
/* cuda utils*/
......@@ -275,14 +436,52 @@ at::Tensor Sum_Square_Backward_CUDA(
dim3 blocks(input_.size(1));
dim3 threads(getNumThreads(input_.size(2)));
AT_DISPATCH_FLOATING_TYPES(input_.type(), "SumSquare_Backward_CUDA", ([&] {
scalar_t norm = scalar_t(1) / (input_.size(0) * input_.size(2));
/* Device tensors */
DeviceTensor<scalar_t, 3> gradInput = devicetensor<scalar_t, 3>(gradInput_);
DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
DeviceTensor<scalar_t, 1> gradSum = devicetensor<scalar_t, 1>(gradSum_);
DeviceTensor<scalar_t, 1> gradSquare =devicetensor<scalar_t, 1>(gradSquare_);
DeviceTensor<scalar_t, 1> gradEx = devicetensor<scalar_t, 1>(gradEx_);
DeviceTensor<scalar_t, 1> gradExs =devicetensor<scalar_t, 1>(gradExs_);
/* kernel function */
Expectation_Backward_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(gradInput, input, gradEx, gradExs, norm);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return gradInput_;
}
at::Tensor Expectation_Inp_Backward_CUDA(
const at::Tensor gradInput_,
const at::Tensor output_,
const at::Tensor gradEx_,
const at::Tensor gradExs_,
const at::Tensor ex_,
const at::Tensor exs_,
const at::Tensor gamma_,
const at::Tensor beta_,
float eps) {
/* outputs */
//auto gradInput_ = at::zeros_like(output_);
auto std_ = (exs_ - ex_ * ex_ + eps).sqrt();
/* cuda utils*/
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(output_.size(1));
dim3 threads(getNumThreads(output_.size(2)));
AT_DISPATCH_FLOATING_TYPES(output_.type(), "SumSquare_Backward_CUDA", ([&] {
scalar_t norm = scalar_t(1) / (output_.size(0) * output_.size(2));
/* Device tensors */
DeviceTensor<scalar_t, 3> gradInput = devicetensor<scalar_t, 3>(gradInput_);
DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(output_);
DeviceTensor<scalar_t, 1> gradEx = devicetensor<scalar_t, 1>(gradEx_);
DeviceTensor<scalar_t, 1> gradExs =devicetensor<scalar_t, 1>(gradExs_);
DeviceTensor<scalar_t, 1> ex = devicetensor<scalar_t, 1>(ex_);
DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
/* kernel function */
Sum_Square_Backward_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(gradInput, input, gradSum, gradSquare);
Expectation_Backward_Inp_kernel<scalar_t>
<<<blocks, threads, 0, stream>>>(gradInput, input, gradEx, gradExs,
ex, std, gamma, beta, norm);
}));
AT_ASSERT(cudaGetLastError() == cudaSuccess);
return gradInput_;
......
from .model_zoo import get_model
from .model_store import get_model_file
from .resnet import *
from .cifarresnet import *
from .base import *
from .fcn import *
from .psp import *
from .encnet import *
from .deeplab import *
def get_segmentation_model(name, **kwargs):
from .fcn import get_fcn
models = {
'fcn': get_fcn,
'psp': get_psp,
'atten': get_atten,
'encnet': get_encnet,
'encnetv2': get_encnetv2,
'deeplab': get_deeplab,
}
return models[name.lower()](**kwargs)
......@@ -10,12 +10,11 @@ import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.functional import upsample
from torch.nn.parallel.data_parallel import DataParallel
from torch.nn.parallel.parallel_apply import parallel_apply
from torch.nn.parallel.scatter_gather import scatter
from .. import dilated as resnet
from . import resnet
from ..utils import batch_pix_accuracy, batch_intersection_union
up_kwargs = {'mode': 'bilinear', 'align_corners': True}
......@@ -35,6 +34,7 @@ class BaseNet(nn.Module):
self.base_size = base_size
self.crop_size = crop_size
# copying modules from pretrained models
self.backbone = backbone
if backbone == 'resnet50':
self.pretrained = resnet.resnet50(pretrained=True, dilated=dilated,
norm_layer=norm_layer, root=root)
......@@ -50,6 +50,20 @@ class BaseNet(nn.Module):
self._up_kwargs = up_kwargs
def base_forward(self, x):
if self.backbone.startswith('wideresnet'):
x = self.pretrained.mod1(x)
x = self.pretrained.pool2(x)
x = self.pretrained.mod2(x)
x = self.pretrained.pool3(x)
x = self.pretrained.mod3(x)
x = self.pretrained.mod4(x)
x = self.pretrained.mod5(x)
c3 = x.clone()
x = self.pretrained.mod6(x)
x = self.pretrained.mod7(x)
x = self.pretrained.bn_out(x)
return None, None, c3, x
else:
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
......@@ -124,6 +138,17 @@ class MultiEvalModule(DataParallel):
width = long_size
height = int(1.0 * h * long_size / w + 0.5)
short_size = height
"""
short_size = int(math.ceil(self.base_size * scale))
if h > w:
width = short_size
height = int(1.0 * h * short_size / w)
long_size = height
else:
height = short_size
width = int(1.0 * w * short_size / h)
long_size = width
"""
# resize image to current size
cur_img = resize_image(image, height, width, **self.module._up_kwargs)
if long_size <= crop_size:
......@@ -180,7 +205,7 @@ def module_inference(module, image, flip=True):
return output.exp()
def resize_image(img, h, w, **up_kwargs):
return F.upsample(img, (h, w), **up_kwargs)
return F.interpolate(img, (h, w), **up_kwargs)
def pad_image(img, mean, std, crop_size):
b,c,h,w = img.size()
......
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import torch
import torch.nn as nn
from torch.autograd import Variable
from ..nn import View
__all__ = ['cifar_resnet20']
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class Basicblock(nn.Module):
""" Pre-activation residual block
Identity Mapping in Deep Residual Networks
ref https://arxiv.org/abs/1603.05027
"""
expansion = 1
def __init__(self, inplanes, planes, stride=1, norm_layer=nn.BatchNorm2d):
super(Basicblock, self).__init__()
if inplanes != planes or stride !=1 :
self.downsample = True
self.residual_layer = nn.Conv2d(inplanes, planes,
kernel_size=1, stride=stride)
else:
self.downsample = False
conv_block=[]
conv_block+=[norm_layer(inplanes),
nn.ReLU(inplace=True),
conv3x3(inplanes, planes,stride=stride),
norm_layer(planes),
nn.ReLU(inplace=True),
conv3x3(planes, planes)]
self.conv_block = nn.Sequential(*conv_block)
def forward(self, input):
if self.downsample:
residual = self.residual_layer(input)
else:
residual = input
return residual + self.conv_block(input)
class Bottleneck(nn.Module):
""" Pre-activation residual block
Identity Mapping in Deep Residual Networks
ref https://arxiv.org/abs/1603.05027
"""
expansion = 4
def __init__(self, inplanes, planes, stride=1, norm_layer=nn.BatchNorm2d):
super(Bottleneck, self).__init__()
if inplanes != planes*self.expansion or stride !=1 :
self.downsample = True
self.residual_layer = nn.Conv2d(inplanes,
planes * self.expansion, kernel_size=1, stride=stride)
else:
self.downsample = False
conv_block = []
conv_block += [norm_layer(inplanes),
nn.ReLU(inplace=True),
nn.Conv2d(inplanes, planes, kernel_size=1,
stride=1, bias=False)]
conv_block += [norm_layer(planes),
nn.ReLU(inplace=True),
nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)]
conv_block += [norm_layer(planes),
nn.ReLU(inplace=True),
nn.Conv2d(planes, planes * self.expansion,
kernel_size=1, stride=1, bias=False)]
self.conv_block = nn.Sequential(*conv_block)
def forward(self, x):
if self.downsample:
residual = self.residual_layer(x)
else:
residual = x
return residual + self.conv_block(x)
class CIFAR_ResNet(nn.Module):
def __init__(self, block=Basicblock, num_blocks=[2,2,2], width_factor = 1,
num_classes=10, norm_layer=torch.nn.BatchNorm2d):
super(CIFAR_ResNet, self).__init__()
self.expansion = block.expansion
self.inplanes = int(width_factor * 16)
strides = [1, 2, 2]
model = []
# Conv_1
model += [nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1),
norm_layer(self.inplanes),
nn.ReLU(inplace=True)]
# Residual units
model += [self._residual_unit(block, self.inplanes, num_blocks[0],
strides[0], norm_layer=norm_layer)]
for i in range(2):
model += [self._residual_unit(
block, int(2*self.inplanes/self.expansion),
num_blocks[i+1], strides[i+1], norm_layer=norm_layer)]
# Last conv layer
model += [norm_layer(self.inplanes),
nn.ReLU(inplace=True),
nn.AvgPool2d(8),
View(-1, self.inplanes),
nn.Linear(self.inplanes, num_classes)]
self.model = nn.Sequential(*model)
def _residual_unit(self, block, planes, n_blocks, stride, norm_layer):
strides = [stride] + [1]*(n_blocks-1)
layers = []
for i in range(n_blocks):
layers += [block(self.inplanes, planes, strides[i], norm_layer=norm_layer)]
self.inplanes = self.expansion*planes
return nn.Sequential(*layers)
def forward(self, input):
return self.model(input)
def cifar_resnet20(pretrained=False, root='~/.encoding/models', **kwargs):
"""Constructs a CIFAR ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = CIFAR_ResNet(Bottleneck, [3, 3, 3], **kwargs)
if pretrained:
model.load_state_dict(torch.load(
get_model_file('cifar_resnet20', root=root)), strict=False)
return model
###########################################################################
# Created by: Hang Zhang
# Email: zhang.hang@rutgers.edu
# Copyright (c) 2017
###########################################################################
from __future__ import division
import os
import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import interpolate
from .base import BaseNet
from .fcn import FCNHead
class DeepLabV3(BaseNet):
def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs):
super(DeepLabV3, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs)
self.head = DeepLabV3Head(2048, nclass, norm_layer, self._up_kwargs)
if aux:
self.auxlayer = FCNHead(1024, nclass, norm_layer)
def forward(self, x):
_, _, h, w = x.size()
_, _, c3, c4 = self.base_forward(x)
outputs = []
x = self.head(c4)
x = interpolate(x, (h,w), **self._up_kwargs)
outputs.append(x)
if self.aux:
auxout = self.auxlayer(c3)
auxout = interpolate(auxout, (h,w), **self._up_kwargs)
outputs.append(auxout)
return tuple(outputs)
class DeepLabV3Head(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer, up_kwargs, atrous_rates=[12, 24, 36], **kwargs):
super(DeepLabV3Head, self).__init__()
inter_channels = in_channels // 8
self.aspp = ASPP_Module(in_channels, atrous_rates, norm_layer, up_kwargs, **kwargs)
self.block = nn.Sequential(
nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False),
norm_layer(inter_channels),
nn.ReLU(True),
nn.Dropout2d(0.1, False),
nn.Conv2d(inter_channels, out_channels, 1))
def forward(self, x):
x = self.aspp(x)
x = self.block(x)
return x
def ASPPConv(in_channels, out_channels, atrous_rate, norm_layer):
block = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=atrous_rate,
dilation=atrous_rate, bias=False),
norm_layer(out_channels),
nn.ReLU(True))
return block
class AsppPooling(nn.Module):
def __init__(self, in_channels, out_channels, norm_layer, up_kwargs):
super(AsppPooling, self).__init__()
self._up_kwargs = up_kwargs
self.gap = nn.Sequential(nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, out_channels, 1, bias=False),
norm_layer(out_channels),
nn.ReLU(True))
def forward(self, x):
_, _, h, w = x.size()
pool = self.gap(x)
return interpolate(pool, (h,w), **self._up_kwargs)
class ASPP_Module(nn.Module):
def __init__(self, in_channels, atrous_rates, norm_layer, up_kwargs):
super(ASPP_Module, self).__init__()
out_channels = in_channels // 8
rate1, rate2, rate3 = tuple(atrous_rates)
self.b0 = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, bias=False),
norm_layer(out_channels),
nn.ReLU(True))
self.b1 = ASPPConv(in_channels, out_channels, rate1, norm_layer)
self.b2 = ASPPConv(in_channels, out_channels, rate2, norm_layer)
self.b3 = ASPPConv(in_channels, out_channels, rate3, norm_layer)
self.b4 = AsppPooling(in_channels, out_channels, norm_layer, up_kwargs)
self.project = nn.Sequential(
nn.Conv2d(5*out_channels, out_channels, 1, bias=False),
norm_layer(out_channels),
nn.ReLU(True),
nn.Dropout2d(0.5, False))
def forward(self, x):
feat0 = self.b0(x)
feat1 = self.b1(x)
feat2 = self.b2(x)
feat3 = self.b3(x)
feat4 = self.b4(x)
y = torch.cat((feat0, feat1, feat2, feat3, feat4), 1)
return self.project(y)
def get_deeplab(dataset='pascal_voc', backbone='resnet50', pretrained=False,
root='~/.encoding/models', **kwargs):
acronyms = {
'pascal_voc': 'voc',
'pascal_aug': 'voc',
'ade20k': 'ade',
}
# infer number of classes
from ..datasets import datasets, VOCSegmentation, VOCAugSegmentation, ADE20KSegmentation
model = DeepLabV3(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
if pretrained:
from .model_store import get_model_file
model.load_state_dict(torch.load(
get_model_file('deeplab_%s_%s'%(backbone, acronyms[dataset]), root=root)))
return model
def get_deeplab_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs):
r"""DeepLabV3 model from the paper `"Context Encoding for Semantic Segmentation"
<https://arxiv.org/pdf/1803.08904.pdf>`_
Parameters
----------
pretrained : bool, default False
Whether to load the pretrained weights for model.
root : str, default '~/.encoding/models'
Location for keeping the model parameters.
Examples
--------
>>> model = get_deeplab_resnet50_ade(pretrained=True)
>>> print(model)
"""
return get_deeplab('ade20k', 'resnet50', pretrained, root=root, **kwargs)
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import torch
import torch.nn as nn
from ..nn import Encoding, View, Normalize
from . import resnet
__all__ = ['DeepTen', 'get_deepten', 'get_deepten_resnet50_minc']
class DeepTen(nn.Module):
def __init__(self, nclass, backbone):
super(DeepTen, self).__init__()
self.backbone = backbone
# copying modules from pretrained models
if self.backbone == 'resnet50':
self.pretrained = resnet.resnet50(pretrained=True, dilated=False)
elif self.backbone == 'resnet101':
self.pretrained = resnet.resnet101(pretrained=True, dilated=False)
elif self.backbone == 'resnet152':
self.pretrained = resnet.resnet152(pretrained=True, dilated=False)
else:
raise RuntimeError('unknown backbone: {}'.format(self.backbone))
n_codes = 32
self.head = nn.Sequential(
nn.Conv2d(2048, 128, 1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
Encoding(D=128,K=n_codes),
View(-1, 128*n_codes),
Normalize(),
nn.Linear(128*n_codes, nclass),
)
def forward(self, x):
_, _, h, w = x.size()
x = self.pretrained.conv1(x)
x = self.pretrained.bn1(x)
x = self.pretrained.relu(x)
x = self.pretrained.maxpool(x)
x = self.pretrained.layer1(x)
x = self.pretrained.layer2(x)
x = self.pretrained.layer3(x)
x = self.pretrained.layer4(x)
return self.head(x)
def get_deepten(dataset='pascal_voc', backbone='resnet50', pretrained=False,
root='~/.encoding/models', **kwargs):
r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network"
<https://arxiv.org/pdf/1612.02844v1.pdf>`_
Parameters
----------
dataset : str, default pascal_voc
The dataset that model pretrained on. (pascal_voc, ade20k)
pretrained : bool, default False
Whether to load the pretrained weights for model.
root : str, default '~/.encoding/models'
Location for keeping the model parameters.
Examples
--------
>>> model = get_deepten(dataset='minc', backbone='resnet50', pretrained=False)
>>> print(model)
"""
from ..datasets import datasets, acronyms
model = DeepTen(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs)
if pretrained:
from .model_store import get_model_file
model.load_state_dict(torch.load(
get_model_file('deepten_%s_%s'%(backbone, acronyms[dataset]), root=root)))
return model
def get_deepten_resnet50_minc(pretrained=False, root='~/.encoding/models', **kwargs):
r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network"
<https://arxiv.org/pdf/1612.02844v1.pdf>`_
Parameters
----------
pretrained : bool, default False
Whether to load the pretrained weights for model.
root : str, default '~/.encoding/models'
Location for keeping the model parameters.
Examples
--------
>>> model = get_deepten_resnet50_minc(pretrained=True)
>>> print(model)
"""
return get_deepten(dataset='minc', backbone='resnet50', pretrained=pretrained,
root=root, **kwargs)
......@@ -9,9 +9,9 @@ from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import encoding
from .base import BaseNet
from .fcn import FCNHead
from ..nn import SyncBatchNorm, Encoding, Mean
__all__ = ['EncNet', 'EncModule', 'get_encnet', 'get_encnet_resnet50_pcontext',
'get_encnet_resnet101_pcontext', 'get_encnet_resnet50_ade',
......@@ -19,7 +19,7 @@ __all__ = ['EncNet', 'EncModule', 'get_encnet', 'get_encnet_resnet50_pcontext',
class EncNet(BaseNet):
def __init__(self, nclass, backbone, aux=True, se_loss=True, lateral=False,
norm_layer=nn.BatchNorm2d, **kwargs):
norm_layer=SyncBatchNorm, **kwargs):
super(EncNet, self).__init__(nclass, backbone, aux, se_loss,
norm_layer=norm_layer, **kwargs)
self.head = EncHead(2048, self.nclass, se_loss=se_loss,
......@@ -33,10 +33,10 @@ class EncNet(BaseNet):
features = self.base_forward(x)
x = list(self.head(*features))
x[0] = F.upsample(x[0], imsize, **self._up_kwargs)
x[0] = F.interpolate(x[0], imsize, **self._up_kwargs)
if self.aux:
auxout = self.auxlayer(features[2])
auxout = F.upsample(auxout, imsize, **self._up_kwargs)
auxout = F.interpolate(auxout, imsize, **self._up_kwargs)
x.append(auxout)
return tuple(x)
......@@ -49,10 +49,10 @@ class EncModule(nn.Module):
nn.Conv2d(in_channels, in_channels, 1, bias=False),
norm_layer(in_channels),
nn.ReLU(inplace=True),
encoding.nn.Encoding(D=in_channels, K=ncodes),
encoding.nn.BatchNorm1d(ncodes),
Encoding(D=in_channels, K=ncodes),
norm_layer(ncodes),
nn.ReLU(inplace=True),
encoding.nn.Mean(dim=1))
Mean(dim=1))
self.fc = nn.Sequential(
nn.Linear(in_channels, in_channels),
nn.Sigmoid())
......@@ -134,14 +134,9 @@ def get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False,
>>> model = get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False)
>>> print(model)
"""
acronyms = {
'pascal_voc': 'voc',
'ade20k': 'ade',
'pcontext': 'pcontext',
}
kwargs['lateral'] = True if dataset.lower().startswith('p') else False
# infer number of classes
from ..datasets import datasets
from ..datasets import datasets, acronyms
model = EncNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
if pretrained:
from .model_store import get_model_file
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment