"vscode:/vscode.git/clone" did not exist on "26c2e0bd35feb7f958924269ccfba6331a1dadbc"
Commit b3091036 authored by rusty1s's avatar rusty1s
Browse files

impl of tensorinfo

parent 9d0fa071
......@@ -37,7 +37,8 @@ def test_scatter_max(str):
assert input.grad.data.tolist() == expected_grad_input
@pytest.mark.parametrize('str', tensor_strs)
# @pytest.mark.parametrize('str', tensor_strs)
@pytest.mark.parametrize('str', ['FloatTensor'])
def test_scatter_cuda_max(str):
input = [[2, 0, 1, 4, 3], [0, 2, 1, 3, 4]]
index = [[4, 5, 4, 2, 3], [0, 0, 2, 2, 1]]
......
#define KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x)
const int MAX_DIMS = 25;
const int NUM_THREADS = 1024;
inline int GET_BLOCKS(const int n) {
return (n + NUM_THREADS - 1) / NUM_THREADS;
}
template <typename T>
struct TensorInfo {
TensorInfo(T *t, int d, int sz[MAX_DIMS], int st[MAX_DIMS]) {
data = t; dims = d;
for (int i = 0; i < dims; i++) {
size[i] = sz[i];
stride[i] = st[i];
}
}
T *data;
int dims;
int size[MAX_DIMS];
int stride[MAX_DIMS];
};
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/common.cu"
#else
void thc_(check)(THCState *state, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, output, input));
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 1, index));
THArgCheck(THCTensor_(nDimension)(state, output) <= MAX_DIMS, 1, "Tensor too large or too many dimensions");
}
TensorInfo<real> thc_(getTensorInfo)(THCState *state, THCTensor *tensor) {
real *data = THCTensor_(data)(state, tensor);
int dims = THCTensor_(nDimension)(state, tensor);
int size[MAX_DIMS]; int stride[MAX_DIMS];
for (int i = 0; i < dims; i++) {
size[i] = THCTensor_(size)(state, tensor, i);
stride[i] = THCTensor_(stride)(state, tensor, i);
}
return TensorInfo<real>(data, dims, size, stride);
}
#endif
......@@ -2,42 +2,40 @@
#define THC_GENERIC_FILE "generic/kernel.cu"
#else
void check(THCState *state, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, output, input));
THCAssertSameGPU(THCudaLongTensor_checkGPU(state, 2, index));
THArgCheck(THCTensor_(nDimension)(state, output) <= MAX_DIMS, 1, "Tensor too large or too many dimensions");
}
void scatter_(mul)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
check(state, output, index, input);
const ptrdiff_t n = THCudaLongTensor_nElement(state, index);
const dim3 block = dim3(NUM_THREADS);
thc_(check)(state, output, index, input);
printf("mul");
}
void scatter_(div)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input) {
check(state, output, index, input);
thc_(check)(state, output, index, input);
printf("div");
}
void scatter_(mean)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input, THCTensor *num_output) {
check(state, output, index, input);
thc_(check)(state, output, index, input);
printf("mean");
}
void scatter_(max)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input, THCudaLongTensor *arg_output) {
check(state, output, index, input);
printf("max");
thc_(check)(state, output, index, input);
const int n = THCudaLongTensor_nElement(state, index);
TensorInfo<real> outputInfo = thc_(getTensorInfo)(state, output);
TensorInfo<int64_t> indexInfo = thc_getTensorInfo_Long(state, index);
TensorInfo<real> inputInfo = thc_(getTensorInfo)(state, input);
TensorInfo<int64_t> argOutputInfo = thc_getTensorInfo_Long(state, arg_output);
maxKernel<real, -1><<<GET_BLOCKS(n), NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(outputInfo, indexInfo, inputInfo, argOutputInfo, dim, n);
}
void scatter_(min)(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *input, THCudaLongTensor *arg_output) {
check(state, output, index, input);
thc_(check)(state, output, index, input);
printf("min");
}
void index_backward(THCState *state, int dim, THCTensor *output, THCudaLongTensor *index, THCTensor *grad, THCudaLongTensor *arg_grad) {
check(state, output, index, grad);
thc_(check)(state, output, index, grad);
printf("index_backward");
}
......
#include <THC/THC.h>
#include "kernel.h"
#include "common.cuh"
#define scatter_(NAME) TH_CONCAT_4(scatter_, NAME, _kernel_, Real)
#define index_backward TH_CONCAT_2(index_backward_kernel_, Real)
#define check TH_CONCAT_2(check_kernel_, Real)
#define MAX_DIMS 25
#define NUM_THREADS 32 * 16
#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)
#include "generic/common.cu"
#include "THCGenerateAllTypes.h"
template <typename Real, int Dims>
__global__ void maxKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, TensorInfo<int64_t> arg_output, const int dim, const int n) {
KERNEL_LOOP(i, n) {
}
}
#include "generic/kernel.cu"
#include "THCGenerateAllTypes.h"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment