backend

c05c2a59 · Hang Zhang · 55dbd840 · c05c2a59 · c05c2a59 · c05c2a59
Commit c05c2a59 authored May 14, 2017 by Hang Zhang
10 changed files
--- a/README.md
+++ b/README.md
@@ -18,6 +18,12 @@ This repo is a PyTorch implementation of Encoding Layer as described in the pape
 - Dependencies
 	* Install PyTorch from source
 - Installing package
-```bash
-bash make.sh
-```
+
+	* On Linux
+	```bash
+	python setup.py install
+	```
+	* On OSX
+	```bash
+	MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install
+	```
--- a/build.py
+++ b/build.py
@@ -11,8 +11,19 @@
 import os
 import torch
 import platform
+import subprocess
 from torch.utils.ffi import create_extension

+# build kernel library
+build_all_cmd = ['bash', 'encoding/make.sh']
+if subprocess.call(build_all_cmd) != 0:
+	sys.exit(1)
+
+sources = ['encoding/src/encoding_lib.cpp']
+headers = ['encoding/src/encoding_lib.h']
+defines = [('WITH_CUDA', None)]
+with_cuda = True 
+
 package_base = os.path.dirname(torch.__file__)
 this_file = os.path.dirname(os.path.realpath(__file__))

@@ -20,11 +31,6 @@ include_path = [os.path.join(os.environ['HOME'],'pytorch/torch/lib/THC'),
 								os.path.join(package_base,'lib/include/ENCODING'), 
 								os.path.join(this_file,'encoding/src/')]

-sources = ['encoding/src/encoding_lib.cpp']
-headers = ['encoding/src/encoding_lib.h']
-defines = [('WITH_CUDA', None)]
-with_cuda = True 
-
 if platform.system() == 'Darwin':
 	ENCODING_LIB = os.path.join(package_base, 'lib/libENCODING.dylib')
 else:
@@ -36,9 +42,6 @@ def make_relative_rpath(path):
 	else:
 		return '-Wl,-rpath,' + path

-extra_link_args = []
-
-
 ffi = create_extension(
 	'encoding._ext.encoding_lib',
 	package=True,

--- a/clean.sh
+++ b/clean.sh
 #!/usr/bin/env bash

-rm -rf build/ dist/ encoding.egg-info/ encoding/build/ encoding/_ext/
+rm -rf build/ dist/ encoding.egg-info/ encoding/build/ encoding/_ext/ __pycache__ encoding/__pycache__
--- a/encoding/kernel/generic/encoding_kernel.c
+++ b/encoding/kernel/generic/encoding_kernel.c
@@ -11,37 +11,14 @@
 #ifndef THC_GENERIC_FILE
 #define THC_GENERIC_FILE "generic/encoding_kernel.c"
 #else
-/*
-template <int Dim>
-THCDeviceTensor<float, Dim> devicetensor(THCState *state, THCTensor *t) {
-  if (!t) {
-    return THCDeviceTensor<float, Dim>();
-  }
-
-  int inDim = THCTensor_(nDimension)(state, t);
-  if (inDim == Dim) {
-    return toDeviceTensor<float, Dim>(state, t);
-  }

-  // View in which the last dimensions are collapsed or expanded as needed
-  THAssert(THCTensor_(isContiguous)(state, t));
-  int size[Dim];
-  for (int i = 0; i < Dim || i < inDim; ++i) {
-    if (i < Dim && i < inDim) {
-      size[i] = t->size[i];
-    } else if (i < Dim) {
-      size[i] = 1;
-    } else {
-      size[Dim - 1] *= t->size[i];
-    }
-  }
-  return THCDeviceTensor<float, Dim>(THCTensor_(data)(state, t), size);
-}
-*/
 __global__ void Encoding_(Aggregate_Forward_kernel) (
 	THCDeviceTensor<real, 3> E,
 	THCDeviceTensor<real, 3> A,
 	THCDeviceTensor<real, 4> R)
+/*
+ * aggregating kernel function
+ */
 {
  /* declarations of the variables */
  int b, k, d, i, N;
@@ -61,13 +38,13 @@ __global__ void Encoding_(Aggregate_Forward_kernel) (
 	E[b][k][d] = sum;
 }

-void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_,
-							THCTensor *R_)
+void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, 
+							THCTensor *A_, THCTensor *R_)
 /*
 * aggregating the residuals with assignment weights
 */
 {
-	/* Check the GPU index */
+	/* Check the GPU index and tensor dims*/
 	THCTensor_(checkGPU)(state, 3, E_, A_, R_);
 	if (THCTensor_(nDimension)(state, E_) != 3 ||
 			THCTensor_(nDimension)(state, A_) != 3 ||
@@ -86,4 +63,55 @@ void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_,
 	THCudaCheck(cudaGetLastError());
 }

+__global__ void Encoding_(Aggregate_Backward_kernel) (
+	THCDeviceTensor<real, 3> G,
+	THCDeviceTensor<real, 3> L,
+	THCDeviceTensor<real, 4> R)
+/*
+ * aggregating backward kernel function
+ */
+{
+  /* declarations of the variables */
+  int b, k, d, i, D;
+	real sum;
+  /* Get the index and channels */ 
+  b = blockIdx.z;
+  k = blockIdx.x * blockDim.x + threadIdx.x;
+  i = blockIdx.y * blockDim.y + threadIdx.y;
+	D = L.getSize(2);
+	/* boundary check for output */
+	if (k >= G.getSize(2) || i >= G.getSize(1))	return;
+	/* main operation */
+	sum = 0;
+	for(d=0; d<D; d++) {
+		sum += L[b][k][d].ldg() * R[b][i][k][d].ldg();
+	}
+	G[b][i][k] = sum;
+}
+
+void Encoding_(Aggregate_Backward)(THCState *state, THCTensor *G_, 
+							THCTensor *L_, THCTensor *R_)
+/*
+ * aggregate backward to assignment weights
+ */
+{
+	/* Check the GPU index and tensor dims*/
+	THCTensor_(checkGPU)(state, 3, G_, L_, R_);
+	if (THCTensor_(nDimension)(state, G_) != 3 ||
+			THCTensor_(nDimension)(state, L_) != 3 ||
+			THCTensor_(nDimension)(state, R_) != 4)
+		THError("Encoding: incorrect input dims. \n");
+	/* Device tensors */
+	THCDeviceTensor<real, 3> G = devicetensor<3>(state, G_);
+	THCDeviceTensor<real, 3> L = devicetensor<3>(state, L_);
+	THCDeviceTensor<real, 4> R = devicetensor<4>(state, R_);
+	/* kernel function */
+	cudaStream_t stream = THCState_getCurrentStream(state);
+	dim3 threads(16, 16);
+	dim3 blocks(G.getSize(2)/16+1, G.getSize(1)/16+1, 
+							G.getSize(0));
+	Encoding_(Aggregate_Backward_kernel)<<<blocks, threads, 0, stream>>>(G, L, R);
+	THCudaCheck(cudaGetLastError());
+}
+
 #endif
--- a/encoding/kernel/generic/encoding_kernel.h
+++ b/encoding/kernel/generic/encoding_kernel.h
@@ -12,6 +12,8 @@
 #define THC_GENERIC_FILE "generic/encoding_kernel.h"
 #else

-void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, THCTensor *A_,
-							THCTensor *R_);
+void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, 
+							THCTensor *A_, THCTensor *R_);
+void Encoding_(Aggregate_Backward)(THCState *state, THCTensor *G_, 
+							THCTensor *L_, THCTensor *R_);
 #endif
--- a/encoding/kernel/thc_encoding.cu
+++ b/encoding/kernel/thc_encoding.cu
@@ -8,44 +8,10 @@
 * LICENSE file in the root directory of this source tree 
 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 */
-#include <THC/THC.h>
-#include "THCDeviceTensor.cuh"
-#include "THCDeviceTensorUtils.cuh"
-
 #include "thc_encoding.h"

-// this symbol will be resolved automatically from PyTorch libs
-extern THCState *state;
-
-#define Encoding_(NAME) TH_CONCAT_4(Encoding_, Real, _, NAME)
-#define THCTensor        TH_CONCAT_3(TH,CReal,Tensor)
-#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
-
-template <int Dim>
-THCDeviceTensor<float, Dim> devicetensor(THCState *state, THCudaTensor *t) {
-  if (!t) {
-    return THCDeviceTensor<float, Dim>();
-  }
-
-  int inDim = THCudaTensor_nDimension(state, t);
-  if (inDim == Dim) {
-    return toDeviceTensor<float, Dim>(state, t);
-  }
-
-  // View in which the last dimensions are collapsed or expanded as needed
-  THAssert(THCudaTensor_isContiguous(state, t));
-  int size[Dim];
-  for (int i = 0; i < Dim || i < inDim; ++i) {
-    if (i < Dim && i < inDim) {
-      size[i] = t->size[i];
-    } else if (i < Dim) {
-      size[i] = 1;
-    } else {
-      size[Dim - 1] *= t->size[i];
-    }
-  }
-  return THCDeviceTensor<float, Dim>(THCudaTensor_data(state, t), size);
-}
+#include "generic/device_tensor.h"
+#include "THC/THCGenerateFloatType.h"

 #ifdef __cplusplus
 extern "C" {

--- a/encoding/src/encoding_lib.h
+++ b/encoding/src/encoding_lib.h
@@ -22,3 +22,5 @@

 int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
 			THCudaTensor *R);
+int Encoding_Float_aggregate_backward(THCudaTensor *G, THCudaTensor *L,
+			THCudaTensor *R);
--- a/encoding/src/generic/encoding_generic.c
+++ b/encoding/src/generic/encoding_generic.c
@@ -12,20 +12,26 @@
 #define THC_GENERIC_FILE "generic/encoding_generic.c"
 #else

-int Encoding_Float_aggregate_forward(THCudaTensor *E, THCudaTensor *A,
+int Encoding_(aggregate_forward)(THCudaTensor *E, THCudaTensor *A,
 			THCudaTensor *R)
 /*
 * Aggregate operation
 */
 {
-	if (THCTensor_(nDimension)(state, E) != 3 ||
-			THCTensor_(nDimension)(state, A) != 3 ||
-			THCTensor_(nDimension)(state, R) != 4)
-		perror("Encoding: incorrect input dims. \n");
-
 	Encoding_(Aggregate_Forward)(state, E, A, R);
 	/* C function return number of the outputs */
 	return 0;
 }

+int Encoding_(aggregate_backward)(THCudaTensor *E, THCudaTensor *A,
+			THCudaTensor *R)
+/*
+ * Aggregate operation
+ */
+{
+	Encoding_(Aggregate_Backward)(state, E, A, R);
+	/* C function return number of the outputs */
+	return 0;
+}
+
 #endif
--- a/make.sh
+++ b/make.sh
-#!/usr/bin/env bash
-
-cd encoding/
-mkdir -p build && cd build
-
-cmake ..
-make install
-
-cd ../..
-python setup.py install
--- a/osx_make.sh
+++ b/osx_make.sh
-#!/usr/bin/env bash
-
-cd encoding/
-mkdir -p build && cd build
-
-cmake ..
-make install
-
-cd ../..
-MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install