Commit c05c2a59 authored by Hang Zhang's avatar Hang Zhang
Browse files

backend

parent 55dbd840
...@@ -18,6 +18,12 @@ This repo is a PyTorch implementation of Encoding Layer as described in the pape ...@@ -18,6 +18,12 @@ This repo is a PyTorch implementation of Encoding Layer as described in the pape
- Dependencies - Dependencies
* Install PyTorch from source * Install PyTorch from source
- Installing package - Installing package
```bash
bash make.sh * On Linux
``` ```bash
python setup.py install
```
* On OSX
```bash
MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
```
...@@ -11,8 +11,19 @@ ...@@ -11,8 +11,19 @@
import os import os
import torch import torch
import platform import platform
import subprocess
from torch.utils.ffi import create_extension from torch.utils.ffi import create_extension
# build kernel library
build_all_cmd = ['bash', 'encoding/make.sh']
if subprocess.call(build_all_cmd) != 0:
sys.exit(1)
sources = ['encoding/src/encoding_lib.cpp']
headers = ['encoding/src/encoding_lib.h']
defines = [('WITH_CUDA', None)]
with_cuda = True
package_base = os.path.dirname(torch.__file__) package_base = os.path.dirname(torch.__file__)
this_file = os.path.dirname(os.path.realpath(__file__)) this_file = os.path.dirname(os.path.realpath(__file__))
...@@ -20,11 +31,6 @@ include_path = [os.path.join(os.environ['HOME'],'pytorch/torch/lib/THC'), ...@@ -20,11 +31,6 @@ include_path = [os.path.join(os.environ['HOME'],'pytorch/torch/lib/THC'),
os.path.join(package_base,'lib/include/ENCODING'), os.path.join(package_base,'lib/include/ENCODING'),
os.path.join(this_file,'encoding/src/')] os.path.join(this_file,'encoding/src/')]
sources = ['encoding/src/encoding_lib.cpp']
headers = ['encoding/src/encoding_lib.h']
defines = [('WITH_CUDA', None)]
with_cuda = True
if platform.system() == 'Darwin': if platform.system() == 'Darwin':
ENCODING_LIB = os.path.join(package_base, 'lib/libENCODING.dylib') ENCODING_LIB = os.path.join(package_base, 'lib/libENCODING.dylib')
else: else:
...@@ -36,9 +42,6 @@ def make_relative_rpath(path): ...@@ -36,9 +42,6 @@ def make_relative_rpath(path):
else: else:
return '-Wl,-rpath,' + path return '-Wl,-rpath,' + path
extra_link_args = []
ffi = create_extension( ffi = create_extension(
'encoding._ext.encoding_lib', 'encoding._ext.encoding_lib',
package=True, package=True,
......
#!/usr/bin/env bash #!/usr/bin/env bash
rm -rf build/ dist/ encoding.egg-info/ encoding/build/ encoding/_ext/ rm -rf build/ dist/ encoding.egg-info/ encoding/build/ encoding/_ext/ __pycache__ encoding/__pycache__
...@@ -11,37 +11,14 @@ ...@@ -11,37 +11,14 @@
#ifndef THC_GENERIC_FILE #ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/encoding_kernel.c" #define THC_GENERIC_FILE "generic/encoding_kernel.c"
#else #else
/*
template <int Dim>
THCDeviceTensor<float, Dim> devicetensor(THCState *state, THCTensor *t) {
if (!t) {
return THCDeviceTensor<float, Dim>();
}
int inDim = THCTensor_(nDimension)(state, t);
if (inDim == Dim) {
return toDeviceTensor<float, Dim>(state, t);
}
// View in which the last dimensions are collapsed or expanded as needed
THAssert(THCTensor_(isContiguous)(state, t));
int size[Dim];
for (int i = 0; i < Dim || i < inDim; ++i) {
if (i < Dim && i < inDim) {
size[i] = t->size[i];
} else if (i < Dim) {
size[i] = 1;
} else {
size[Dim - 1] *= t->size[i];
}
}
return THCDeviceTensor<float, Dim>(THCTensor_(data)(state, t), size);
}
*/
__global__ void Encoding_(Aggregate_Forward_kernel) ( __global__ void Encoding_(Aggregate_Forward_kernel) (
THCDeviceTensor<real, 3> E, THCDeviceTensor<real, 3> E,
THCDeviceTensor<real, 3> A, THCDeviceTensor<real, 3> A,
THCDeviceTensor<real, 4> R) THCDeviceTensor<real, 4> R)
/*
* aggregating kernel function
*/
{ {
/* declarations of the variables */ /* declarations of the variables */
int b, k, d, i, N; int b, k, d, i, N;
...@@ -61,13 +38,13 @@ __global__ void Encoding_(Aggregate_Forward_kernel) ( ...@@ -61,13 +38,13 @@ __global__ void Encoding_(Aggregate_Forward_kernel) (
E[b][k][d] = sum; E[b][k][d] = sum;
} }
void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_, void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_,
THCTensor *R_) THCTensor *A_, THCTensor *R_)
/* /*
* aggregating the residuals with assignment weights * aggregating the residuals with assignment weights
*/ */
{ {
/* Check the GPU index */ /* Check the GPU index and tensor dims*/
THCTensor_(checkGPU)(state, 3, E_, A_, R_); THCTensor_(checkGPU)(state, 3, E_, A_, R_);
if (THCTensor_(nDimension)(state, E_) != 3 || if (THCTensor_(nDimension)(state, E_) != 3 ||
THCTensor_(nDimension)(state, A_) != 3 || THCTensor_(nDimension)(state, A_) != 3 ||
...@@ -86,4 +63,55 @@ void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_, ...@@ -86,4 +63,55 @@ void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_,
THCudaCheck(cudaGetLastError()); THCudaCheck(cudaGetLastError());
} }
__global__ void Encoding_(Aggregate_Backward_kernel) (
THCDeviceTensor<real, 3> G,
THCDeviceTensor<real, 3> L,
THCDeviceTensor<real, 4> R)
/*
* aggregating backward kernel function
*/
{
/* declarations of the variables */
int b, k, d, i, D;
real sum;
/* Get the index and channels */
b = blockIdx.z;
k = blockIdx.x * blockDim.x + threadIdx.x;
i = blockIdx.y * blockDim.y + threadIdx.y;
D = L.getSize(2);
/* boundary check for output */
if (k >= G.getSize(2) || i >= G.getSize(1)) return;
/* main operation */
sum = 0;
for(d=0; d<D; d++) {
sum += L[b][k][d].ldg() * R[b][i][k][d].ldg();
}
G[b][i][k] = sum;
}
void Encoding_(Aggregate_Backward)(THCState *state, THCTensor *G_,
THCTensor *L_, THCTensor *R_)
/*
* aggregate backward to assignment weights
*/
{
/* Check the GPU index and tensor dims*/
THCTensor_(checkGPU)(state, 3, G_, L_, R_);
if (THCTensor_(nDimension)(state, G_) != 3 ||
THCTensor_(nDimension)(state, L_) != 3 ||
THCTensor_(nDimension)(state, R_) != 4)
THError("Encoding: incorrect input dims. \n");
/* Device tensors */
THCDeviceTensor<real, 3> G = devicetensor<3>(state, G_);
THCDeviceTensor<real, 3> L = devicetensor<3>(state, L_);
THCDeviceTensor<real, 4> R = devicetensor<4>(state, R_);
/* kernel function */
cudaStream_t stream = THCState_getCurrentStream(state);
dim3 threads(16, 16);
dim3 blocks(G.getSize(2)/16+1, G.getSize(1)/16+1,
G.getSize(0));
Encoding_(Aggregate_Backward_kernel)<<<blocks, threads, 0, stream>>>(G, L, R);
THCudaCheck(cudaGetLastError());
}
#endif #endif
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#define THC_GENERIC_FILE "generic/encoding_kernel.h" #define THC_GENERIC_FILE "generic/encoding_kernel.h"
#else #else
void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_, void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_,
THCTensor *R_); THCTensor *A_, THCTensor *R_);
void Encoding_(Aggregate_Backward)(THCState *state, THCTensor *G_,
THCTensor *L_, THCTensor *R_);
#endif #endif
...@@ -8,44 +8,10 @@ ...@@ -8,44 +8,10 @@
* LICENSE file in the root directory of this source tree * LICENSE file in the root directory of this source tree
*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*/ */
#include <THC/THC.h>
#include "THCDeviceTensor.cuh"
#include "THCDeviceTensorUtils.cuh"
#include "thc_encoding.h" #include "thc_encoding.h"
// this symbol will be resolved automatically from PyTorch libs #include "generic/device_tensor.h"
extern THCState *state; #include "THC/THCGenerateFloatType.h"
#define Encoding_(NAME) TH_CONCAT_4(Encoding_, Real, _, NAME)
#define THCTensor TH_CONCAT_3(TH,CReal,Tensor)
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
template <int Dim>
THCDeviceTensor<float, Dim> devicetensor(THCState *state, THCudaTensor *t) {
if (!t) {
return THCDeviceTensor<float, Dim>();
}
int inDim = THCudaTensor_nDimension(state, t);
if (inDim == Dim) {
return toDeviceTensor<float, Dim>(state, t);
}
// View in which the last dimensions are collapsed or expanded as needed
THAssert(THCudaTensor_isContiguous(state, t));
int size[Dim];
for (int i = 0; i < Dim || i < inDim; ++i) {
if (i < Dim && i < inDim) {
size[i] = t->size[i];
} else if (i < Dim) {
size[i] = 1;
} else {
size[Dim - 1] *= t->size[i];
}
}
return THCDeviceTensor<float, Dim>(THCudaTensor_data(state, t), size);
}
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
......
...@@ -22,3 +22,5 @@ ...@@ -22,3 +22,5 @@
int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A, int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *R); THCudaTensor *R);
int Encoding_Float_aggregate_backward(THCudaTensor *G, THCudaTensor *L,
THCudaTensor *R);
...@@ -12,20 +12,26 @@ ...@@ -12,20 +12,26 @@
#define THC_GENERIC_FILE "generic/encoding_generic.c" #define THC_GENERIC_FILE "generic/encoding_generic.c"
#else #else
int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A, int Encoding_(aggregate_forward)(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *R) THCudaTensor *R)
/* /*
* Aggregate operation * Aggregate operation
*/ */
{ {
if (THCTensor_(nDimension)(state, E) != 3 ||
THCTensor_(nDimension)(state, A) != 3 ||
THCTensor_(nDimension)(state, R) != 4)
perror("Encoding: incorrect input dims. \n");
Encoding_(Aggregate_Forward)(state, E, A, R); Encoding_(Aggregate_Forward)(state, E, A, R);
/* C function return number of the outputs */ /* C function return number of the outputs */
return 0; return 0;
} }
int Encoding_(aggregate_backward)(THCudaTensor *E, THCudaTensor *A,
THCudaTensor *R)
/*
* Aggregate operation
*/
{
Encoding_(Aggregate_Backward)(state, E, A, R);
/* C function return number of the outputs */
return 0;
}
#endif #endif
#!/usr/bin/env bash
cd encoding/
mkdir -p build && cd build
cmake ..
make install
cd ../..
python setup.py install
#!/usr/bin/env bash
cd encoding/
mkdir -p build && cd build
cmake ..
make install
cd ../..
MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment