"torchvision/git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "74b6a750c7b748d03a3c80f29ebf17ca98dcab8b"
encoding_kernel.c 8.44 KB
Newer Older
Hang Zhang's avatar
init  
Hang Zhang committed
1
2
3
4
5
6
7
8
9
10
11
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 * Created by: Hang Zhang
 * ECE Department, Rutgers University
 * Email: zhang.hang@rutgers.edu
 * Copyright (c) 2017
 *
 * This source code is licensed under the MIT-style license found in the
 * LICENSE file in the root directory of this source tree 
 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 */
#ifndef THC_GENERIC_FILE
Hang Zhang's avatar
tested  
Hang Zhang committed
12
#define THC_GENERIC_FILE "generic/encoding_kernel.c"
Hang Zhang's avatar
init  
Hang Zhang committed
13
14
#else

Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
15
__global__ void Encoding_(Aggregate_Forward_kernel) (
16
17
18
19
20
21
22
23
24
    THCDeviceTensor<real, 3> E,
    THCDeviceTensor<real, 3> A,
    THCDeviceTensor<real, 3> X,
    THCDeviceTensor<real, 2> C)
/*
 * aggregating forward kernel function
 */
{
    /* declarations of the variables */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
25
    int b, k, d, N;
26
27
    /* Get the index and channels */ 
    b = blockIdx.z;
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
28
29
30
31
    d = blockIdx.x;
    k = blockIdx.y;
    N = X.getSize(1);

32
    /* main operation */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
33
34
    Encoding_(AggOp) g(A,X,C);
    E[b][k][d] = Encoding_(reduce_agg)(g,b,k,d,N);
35
36
}

Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
37
void Encoding_(Aggregate_Forward)(THCState *state, THCTensor *E_, 
38
39
40
41
42
43
44
45
    THCTensor *A_, THCTensor *X_, THCTensor *C_)
/*
 * aggregating forward the residuals with assignment weights
 */
{
    /* Check the GPU index and tensor dims*/
    THCTensor_(checkGPU)(state, 4, E_, A_, X_, C_);
    if (THCTensor_(nDimension)(state, E_) != 3 ||
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
46
47
48
        THCTensor_(nDimension)(state, A_) != 3 ||
        THCTensor_(nDimension)(state, X_) != 3 ||
        THCTensor_(nDimension)(state, C_) != 2)
49
50
51
52
53
54
55
56
        THError("Encoding: incorrect input dims. \n");
    /* Device tensors */
    THCDeviceTensor<real, 3> E = devicetensor<3>(state, E_);
    THCDeviceTensor<real, 3> A = devicetensor<3>(state, A_);
    THCDeviceTensor<real, 3> X = devicetensor<3>(state, X_);
    THCDeviceTensor<real, 2> C = devicetensor<2>(state, C_);
    /* kernel function */
    cudaStream_t stream = THCState_getCurrentStream(state);
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
57
58
59
60
61
    // B, K, D
    dim3 blocks(C.getSize(1), C.getSize(0), X.getSize(0));
    // N
    dim3 threads(getNumThreads(X.getSize(1)));
    Encoding_(Aggregate_Forward_kernel)<<<blocks, threads, 0, stream>>>
62
63
64
65
66
        (E, A, X, C);
    THCudaCheck(cudaGetLastError());
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
67
__global__ void Encoding_(Aggregate_Backward_kernel) (
68
69
70
71
72
73
74
75
76
77
78
    THCDeviceTensor<real, 3> GA,
    THCDeviceTensor<real, 3> GE,
    THCDeviceTensor<real, 3> A,
    THCDeviceTensor<real, 3> X,
    THCDeviceTensor<real, 2> C)
/*
 * aggregating backward kernel function
 * G (dl/dR), L (dl/dE), A
 */
{
    /* declarations of the variables */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
79
    int b, k, i, D;
80
81
    /* Get the index and channels */ 
    b = blockIdx.z;
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
82
83
    i = blockIdx.y;
    k = blockIdx.x;
84
85
    D = GE.getSize(2);
    /* main operation */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
86
87
    Encoding_(AggBackOp) g(GE,X,C);
    GA[b][i][k] = Encoding_(reduce_aggback)(g,b,i,k,D);
88
89
}

Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
90
void Encoding_(Aggregate_Backward)(THCState *state, THCTensor *GA_, 
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
     THCTensor *GE_, THCTensor *A_, THCTensor *X_, THCTensor *C_)
/*
 * aggregate backward to assignment weights
 * G (dl/dR), L (dl/dE), A
 */
{
    /* Check the GPU index and tensor dims*/
    THCTensor_(checkGPU)(state, 5, GA_, GE_, A_, X_, C_);
    if (THCTensor_(nDimension)(state, GA_) != 3 ||
        THCTensor_(nDimension)(state, GE_)  != 3 ||
        THCTensor_(nDimension)(state, A_)  != 3 ||
        THCTensor_(nDimension)(state, X_)  != 3 ||
        THCTensor_(nDimension)(state, C_)  != 2)
    THError("Encoding: incorrect input dims. \n");
    /* Device tensors */
    THCDeviceTensor<real, 3> GA = devicetensor<3>(state, GA_);
    THCDeviceTensor<real, 3> GE = devicetensor<3>(state, GE_);
    THCDeviceTensor<real, 3> A = devicetensor<3>(state, A_);
    THCDeviceTensor<real, 3> X = devicetensor<3>(state, X_);
    THCDeviceTensor<real, 2> C = devicetensor<2>(state, C_);
    /* kernel function */
    cudaStream_t stream = THCState_getCurrentStream(state);
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
113
114
115
116
117
    // B, K, D
    dim3 blocks(C.getSize(0), X.getSize(1), X.getSize(0));
    // N
    dim3 threads(getNumThreads(C.getSize(1)));
    Encoding_(Aggregate_Backward_kernel)<<<blocks, threads, 0, stream>>>
118
119
        (GA, GE, A, X, C);
    THCudaCheck(cudaGetLastError());
Hang Zhang's avatar
init  
Hang Zhang committed
120
121
}

122
123
124
125
126
127
128
129
130
131
132
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
__global__ void Encoding_(ScaledL2_Forward_kernel) (
    THCDeviceTensor<real, 3> SL,
    THCDeviceTensor<real, 3> X,
    THCDeviceTensor<real, 2> C,
    THCDeviceTensor<real, 1> S)
/*
 * aggregating forward kernel function
 */
{
    /* declarations of the variables */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
133
    int b, k, i, D;
134
135
    /* Get the index and channels */ 
    b = blockIdx.z;
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
136
137
    k = blockIdx.x;
    i = blockIdx.y;
138
139
    D = X.getSize(2);
    /* main operation */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
140
141
    Encoding_(L2Op) g(X,C);
    SL[b][i][k] = S[k] * Encoding_(reduce_sl2)(g,b,i,k,D);;
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
}

void Encoding_(ScaledL2_Forward)(
    THCState *state, THCTensor *SL_,  THCTensor *X_,
    THCTensor *C_,  THCTensor *S_)
/*
 * aggregating forward the residuals with assignment weights
 */
{
    /* Check the GPU index and tensor dims*/
    THCTensor_(checkGPU)(state, 4, SL_, X_, C_, S_); 
    if (THCTensor_(nDimension)(state, SL_) != 3 ||
        THCTensor_(nDimension)(state, X_) != 3 ||
        THCTensor_(nDimension)(state, C_) != 2 ||
        THCTensor_(nDimension)(state, S_) != 1)
    THError("Encoding: incorrect input dims. \n");
    /* Device tensors */
    THCDeviceTensor<real, 3> SL = devicetensor<3>(state, SL_);
    THCDeviceTensor<real, 3> X  = devicetensor<3>(state, X_);
    THCDeviceTensor<real, 2> C  = devicetensor<2>(state, C_);
    THCDeviceTensor<real, 1> S  = devicetensor<1>(state, S_);
    /* kernel function */
    cudaStream_t stream = THCState_getCurrentStream(state);
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
165
166
    dim3 blocks(C.getSize(0), X.getSize(1), X.getSize(0));
    dim3 threads(getNumThreads(C.getSize(1)));
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
    Encoding_(ScaledL2_Forward_kernel)<<<blocks, threads, 0, stream>>>
        (SL, X, C, S);
    THCudaCheck(cudaGetLastError());
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
__global__ void Encoding_(ScaledL2X_Backward_kernel) (
    THCDeviceTensor<real, 3> GSL,
    THCDeviceTensor<real, 3> GX,
    THCDeviceTensor<real, 3> X,
    THCDeviceTensor<real, 2> C,
    THCDeviceTensor<real, 1> S)
/*
 */
{
    /* declarations of the variables */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
183
    int b, d, i, K;
184
185
    /* Get the index and channels */ 
    b = blockIdx.z;
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
186
187
    d = blockIdx.x;
    i = blockIdx.y;
188
189
    K = C.getSize(0);
    /* main operation */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
190
191
    Encoding_(L2XBackOp) g(GSL,X,C,S);
    GX[b][i][d] = Encoding_(reduce_sl2xback)(g,b,i,d,K);
192
193
194
195
196
197
198
199
200
201
202
203
}

__global__ void Encoding_(ScaledL2C_Backward_kernel) (
    THCDeviceTensor<real, 3> GSL,
    THCDeviceTensor<real, 2> GC,
    THCDeviceTensor<real, 3> X,
    THCDeviceTensor<real, 2> C,
    THCDeviceTensor<real, 1> S)
/*
 */
{
    /* declarations of the variables */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
204
    int k, d, B, N;
205
    /* Get the index and channels */ 
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
206
207
    d = blockIdx.x;
    k = blockIdx.y;
208
209
210
    B = X.getSize(0);
    N = X.getSize(1);
    /* main operation */
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
211
212
    Encoding_(L2CBackOp) g(GSL,X,C,S);
    GC[k][d] = Encoding_(reduce_sl2cback)(g,k,d,B,N);
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
}

void Encoding_(ScaledL2_Backward)(
    THCState *state, THCTensor *GSL_, THCTensor *GX_, THCTensor *GC_,
    THCTensor *X_, THCTensor *C_, THCTensor *S_)
/*
 */
{
    /* Check the GPU index and tensor dims*/
    THCTensor_(checkGPU)(state, 6, GSL_, GX_, GC_, X_, C_, S_); 
    if (THCTensor_(nDimension)(state, GSL_) != 3 ||
        THCTensor_(nDimension)(state, GX_)  != 3 ||
        THCTensor_(nDimension)(state, GC_)  != 2 ||
        THCTensor_(nDimension)(state, X_)   != 3 ||
        THCTensor_(nDimension)(state, C_)   != 2 ||
        THCTensor_(nDimension)(state, S_)   != 1)
    THError("Encoding: incorrect input dims. \n");
    /* Device tensors */
    THCDeviceTensor<real, 3> GSL = devicetensor<3>(state, GSL_);
    THCDeviceTensor<real, 3> GX = devicetensor<3>(state, GX_);
    THCDeviceTensor<real, 2> GC = devicetensor<2>(state, GC_);
    THCDeviceTensor<real, 3> X  = devicetensor<3>(state, X_);
    THCDeviceTensor<real, 2> C  = devicetensor<2>(state, C_);
    THCDeviceTensor<real, 1> S = devicetensor<1>(state, S_);
    /* kernel function */
    cudaStream_t stream = THCState_getCurrentStream(state);
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
239
240
    dim3 blocks(X.getSize(2), X.getSize(1), X.getSize(0));
    dim3 threads(getNumThreads(C.getSize(0)));
241
242
243
    Encoding_(ScaledL2X_Backward_kernel)<<<blocks, threads, 0, stream>>>
        (GSL, GX, X, C, S);
    THCudaCheck(cudaGetLastError());
Hang Zhang's avatar
v0.1.0  
Hang Zhang committed
244
245
246
    dim3 blocks2(C.getSize(1), C.getSize(0));
    dim3 threads2(getNumThreads(X.getSize(1)));
    Encoding_(ScaledL2C_Backward_kernel)<<<blocks2, threads2, 0, stream>>>
247
248
249
        (GSL, GC, X, C, S);
    THCudaCheck(cudaGetLastError());
}
Hang Zhang's avatar
Hang Zhang committed
250

Hang Zhang's avatar
init  
Hang Zhang committed
251
#endif