Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
SparseConvNet
Commits
1df7b845
Commit
1df7b845
authored
May 14, 2018
by
Benjamin Thomas Graham
Browse files
3d segmantation
parent
f2e3800b
Changes
204
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
754 additions
and
15 deletions
+754
-15
sparseconvnet/SCN/generic/GPU/BatchwiseMultiplicativeDropout.h
...econvnet/SCN/generic/GPU/BatchwiseMultiplicativeDropout.h
+0
-0
sparseconvnet/SCN/generic/GPU/Convolution.cu
sparseconvnet/SCN/generic/GPU/Convolution.cu
+313
-0
sparseconvnet/SCN/generic/GPU/Convolution.h
sparseconvnet/SCN/generic/GPU/Convolution.h
+0
-0
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
+2
-2
sparseconvnet/SCN/generic/GPU/Deconvolution.h
sparseconvnet/SCN/generic/GPU/Deconvolution.h
+0
-0
sparseconvnet/SCN/generic/GPU/IOLayers.cu
sparseconvnet/SCN/generic/GPU/IOLayers.cu
+250
-0
sparseconvnet/SCN/generic/GPU/IOLayers.h
sparseconvnet/SCN/generic/GPU/IOLayers.h
+3
-3
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
+0
-0
sparseconvnet/SCN/generic/GPU/LeakyReLU.h
sparseconvnet/SCN/generic/GPU/LeakyReLU.h
+0
-0
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
+47
-3
sparseconvnet/SCN/generic/GPU/MaxPooling.h
sparseconvnet/SCN/generic/GPU/MaxPooling.h
+0
-0
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
+0
-0
sparseconvnet/SCN/generic/GPU/RuleBookIterator.h
sparseconvnet/SCN/generic/GPU/RuleBookIterator.h
+6
-4
sparseconvnet/SCN/generic/GPU/SparseToDense.cu
sparseconvnet/SCN/generic/GPU/SparseToDense.cu
+2
-3
sparseconvnet/SCN/generic/GPU/SparseToDense.h
sparseconvnet/SCN/generic/GPU/SparseToDense.h
+0
-0
sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
+0
-0
sparseconvnet/SCN/generic/GPU/THGenerateDimCudaFloatTypes.h
sparseconvnet/SCN/generic/GPU/THGenerateDimCudaFloatTypes.h
+0
-0
sparseconvnet/SCN/generic/GPU/UnPooling.cu
sparseconvnet/SCN/generic/GPU/UnPooling.cu
+58
-0
sparseconvnet/SCN/generic/GPU/UnPooling.h
sparseconvnet/SCN/generic/GPU/UnPooling.h
+73
-0
sparseconvnet/SCN/generic/Geometry/ActivePoolingRules.h
sparseconvnet/SCN/generic/Geometry/ActivePoolingRules.h
+0
-0
No files found.
PyTorch/
sparseconvnet/SCN/generic/GPU/BatchwiseMultiplicativeDropout.h
→
sparseconvnet/SCN/generic/GPU/BatchwiseMultiplicativeDropout.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/Convolution.cu
→
sparseconvnet/SCN/generic/GPU/Convolution.cu
View file @
1df7b845
...
...
@@ -16,7 +16,7 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
...
...
@@ -38,9 +38,9 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<
<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
...
...
@@ -57,7 +57,7 @@ extern "C" void scn_DR_(Convolution_backward)(
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
...
...
@@ -90,7 +90,7 @@ extern "C" void scn_DR_(Convolution_backward)(
extern
"C"
double
scn_DR_
(
SubmanifoldConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
filterSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSubmanifoldRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
_m
.
getNActive
(
inputSize
);
...
...
@@ -111,9 +111,9 @@ extern "C" double scn_DR_(SubmanifoldConvolution_updateOutput)(
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<
<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
...
...
@@ -129,7 +129,7 @@ extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
THLongTensor
*
inputSize
,
THLongTensor
*
filterSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSubmanifoldRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
_m
.
getNActive
(
inputSize
);
...
...
@@ -158,4 +158,156 @@ extern "C" void scn_DR_(SubmanifoldConvolution_backward)(
}
}
extern
"C"
double
scn_DR_
(
FullConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
mIn
,
void
**
mOut
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mIn
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mOut
)
auto
_rules
=
_mIn
.
getFullConvolutionRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
_mOut
);
uInt
nActive
=
_mOut
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
FullConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
mIn
,
void
**
mOut
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mIn
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mOut
)
auto
_rules
=
_mIn
.
getFullConvolutionRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
_mOut
);
uInt
nActive
=
_mOut
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
RandomizedStrideConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
RandomizedStrideConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
#endif
PyTorch/
sparseconvnet/SCN/generic/GPU/Convolution.h
→
sparseconvnet/SCN/generic/GPU/Convolution.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
→
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
View file @
1df7b845
...
...
@@ -16,7 +16,7 @@ extern "C" double scn_DR_(Deconvolution_updateOutput)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
filterSize
,
filterStride
,
true
);
...
...
@@ -55,7 +55,7 @@ extern "C" void scn_DR_(Deconvolution_backward)(
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
filterSize
,
filterStride
,
true
);
...
...
PyTorch/
sparseconvnet/SCN/generic/GPU/Deconvolution.h
→
sparseconvnet/SCN/generic/GPU/Deconvolution.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/I
nput
Layer.cu
→
sparseconvnet/SCN/generic/GPU/I
O
Layer
s
.cu
View file @
1df7b845
...
...
@@ -5,61 +5,128 @@
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/I
nput
Layer.cu"
#define TH_GENERIC_FILE_ "generic/GPU/I
O
Layer
s
.cu"
#else
#include "I
nput
Layer.h"
#include "I
O
Layer
s
.h"
extern
"C"
void
scn_DR_
(
InputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
batchSize
,
long
mode
,
THCITensor
*
rulesBuffer
)
{
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
inputLayer
(
spatialSize
,
input_coords
,
batchSize
,
mode
);
uInt
nPlanes
=
input_features
->
size
[
1
];
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
3
];
THCITensor_resize1d
(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_data
(
state
,
rulesBuffer
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)
(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
InputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
3
];
THCITensor_resize1d
(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_data
(
state
,
rulesBuffer
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)
(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
mode
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
blLayer
(
spatialSize
,
input_coords
,
mode
);
uInt
nPlanes
=
input_features
->
size
[
2
];
...
...
@@ -74,21 +141,22 @@ extern "C" void scn_DR_(BLInputLayer_updateOutput)(
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
}
else
{
THCITensor_resize1d
(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_data
(
state
,
rulesBuffer
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)
(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
...
...
@@ -99,76 +167,84 @@ scn_DR_(BLInputLayer_updateGradInput)(void **m, THCTensor *d_input_features,
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
THCITensor_resize1d
(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_data
(
state
,
rulesBuffer
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)
(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCITensor
*
rulesBuffer
)
{
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
THCITensor_resize1d
(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_data
(
state
,
rulesBuffer
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)
(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
2
];
uInt
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
THCITensor_resize1d
(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_data
(
state
,
rulesBuffer
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)
(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
#endif
PyTorch/
sparseconvnet/SCN/generic/GPU/I
nput
Layer.h
→
sparseconvnet/SCN/generic/GPU/I
O
Layer
s
.h
View file @
1df7b845
...
...
@@ -4,8 +4,8 @@
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef GPU_I
NPUT
LAYER_H
#define GPU_I
NPUT
LAYER_H
#ifndef GPU_I
O
LAYER
S
_H
#define GPU_I
O
LAYER
S
_H
template
<
typename
T
>
__global__
void
InputLayer_fp
(
T
*
input_features
,
T
*
output_features
,
...
...
@@ -40,4 +40,4 @@ __global__ void InputLayer_bp(T *d_input_features, T *d_output_features,
}
}
}
#endif
/* GPU_I
NPUT
LAYER_H */
#endif
/* GPU_I
O
LAYER
S
_H */
PyTorch/
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
→
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/LeakyReLU.h
→
sparseconvnet/SCN/generic/GPU/LeakyReLU.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
→
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
View file @
1df7b845
...
...
@@ -13,7 +13,7 @@
extern
"C"
void
scn_DR_
(
MaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
...
...
@@ -35,8 +35,7 @@ extern "C" void scn_DR_(MaxPooling_updateGradInput)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
...
...
@@ -56,4 +55,49 @@ extern "C" void scn_DR_(MaxPooling_updateGradInput)(
d_output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
MaxPooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
MaxPooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
#endif
PyTorch/
sparseconvnet/SCN/generic/GPU/MaxPooling.h
→
sparseconvnet/SCN/generic/GPU/MaxPooling.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
→
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/RuleBookIterator.h
→
sparseconvnet/SCN/generic/GPU/RuleBookIterator.h
View file @
1df7b845
...
...
@@ -14,9 +14,10 @@
#define RULEBOOKITERATOR(X, Y) \
uInt ms = ruleBookMaxSize(_rules); \
if (THCITensor_nElement(state, rulesBuffer) < ms) \
THCITensor_resize1d(state, rulesBuffer, ms); \
uInt *rbB = (uInt *)THCITensor_data(state, rulesBuffer); \
auto rulesBuffer = THCITensor_(new)(state); \
if (THCITensor_(nElement)(state, rulesBuffer) < ms) \
THCITensor_(resize1d)(state, rulesBuffer, ms); \
uInt *rbB = (uInt *)THCITensor_(data)(state, rulesBuffer); \
for (int k = 0; k < _rules.size(); ++k) { \
auto &r = _rules[k]; \
uInt nHotB = r.size() / 2; \
...
...
@@ -28,6 +29,7 @@
X \
} \
Y \
}
} \
THCITensor_(free)(state, rulesBuffer);
#endif
/* GPU_RULEBOOKITERATOR_H */
PyTorch/
sparseconvnet/SCN/generic/GPU/SparseToDense.cu
→
sparseconvnet/SCN/generic/GPU/SparseToDense.cu
View file @
1df7b845
...
...
@@ -11,7 +11,7 @@
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCITensor
*
rulesBuffer
,
long
nPlanes
)
{
THCTensor
*
output_features
,
long
nPlanes
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
{
...
...
@@ -36,8 +36,7 @@ extern "C" void scn_DR_(SparseToDense_updateOutput)(
}
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
...
...
PyTorch/
sparseconvnet/SCN/generic/GPU/SparseToDense.h
→
sparseconvnet/SCN/generic/GPU/SparseToDense.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
→
sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
View file @
1df7b845
File moved
PyTorch/
sparseconvnet/SCN/generic/GPU/THGenerateDimCudaFloatTypes.h
→
sparseconvnet/SCN/generic/GPU/THGenerateDimCudaFloatTypes.h
View file @
1df7b845
File moved
sparseconvnet/SCN/generic/GPU/UnPooling.cu
0 → 100644
View file @
1df7b845
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/UnPooling.cu"
#else
#include "RuleBookIterator.h"
#include "UnPooling.h"
extern
"C"
void
scn_DR_
(
UnPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
UnPooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
,
_rules
.
size
());
,
)
}
extern
"C"
void
scn_DR_
(
UnPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
)
+
nFeaturesToDrop
;
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
UnPooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
diF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
,
_rules
.
size
());
,
)
}
#endif
sparseconvnet/SCN/generic/GPU/UnPooling.h
0 → 100644
View file @
1df7b845
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef GPU_UNPOOLING_H
#define GPU_UNPOOLING_H
// NTX must be >=2 so r is filled properly
template
<
typename
T
,
uInt
NTX
,
uInt
NTY
>
__global__
void
UnPooling_fp
(
T
*
input_features
,
T
*
output_features
,
uInt
nPlanes
,
uInt
input_stride
,
uInt
output_stride
,
uInt
*
rules
,
uInt
nHot
)
{
__shared__
uInt
r
[
NTY
*
2
];
for
(
uInt
n
=
blockIdx
.
x
*
NTY
;
n
<
nHot
;
n
+=
gridDim
.
x
*
NTY
)
{
{
uInt
i
=
threadIdx
.
x
+
NTX
*
threadIdx
.
y
;
if
(
i
<
NTY
*
2
and
i
<
2
*
(
n
-
nHot
))
r
[
i
]
=
rules
[
2
*
n
+
i
];
}
__syncthreads
();
if
(
n
+
threadIdx
.
y
<
nHot
)
{
uInt
i
=
r
[
2
*
threadIdx
.
y
+
1
]
*
input_stride
;
uInt
o
=
r
[
2
*
threadIdx
.
y
]
*
output_stride
;
for
(
uInt
plane
=
threadIdx
.
x
;
plane
<
nPlanes
;
plane
+=
NTX
)
output_features
[
o
+
plane
]
+=
input_features
[
i
+
plane
];
}
__syncthreads
();
}
}
template
<
typename
T
>
void
UnPooling_ForwardPass
(
cudaStream_t
stream
,
T
*
input_features
,
T
*
output_features
,
uInt
nPlanes
,
uInt
input_stride
,
uInt
output_stride
,
uInt
*
rules
,
uInt
nHot
,
uInt
filterVolume
)
{
UnPooling_fp
<
T
,
32
,
32
><<<
32
,
dim3
(
32
,
32
),
0
,
stream
>>>
(
input_features
,
output_features
,
nPlanes
,
input_stride
,
output_stride
,
rules
,
nHot
);
}
template
<
typename
T
,
uInt
NTX
,
uInt
NTY
>
__global__
void
UnPooling_bp
(
T
*
d_input_features
,
T
*
d_output_features
,
uInt
nPlanes
,
uInt
input_stride
,
uInt
output_stride
,
uInt
*
rules
,
uInt
nHot
)
{
__shared__
uInt
r
[
NTY
*
2
];
for
(
uInt
n
=
blockIdx
.
x
*
NTY
;
n
<
nHot
;
n
+=
gridDim
.
x
*
NTY
)
{
{
uInt
i
=
threadIdx
.
x
+
NTX
*
threadIdx
.
y
;
if
(
i
<
NTY
*
2
and
i
<
2
*
(
n
-
nHot
))
r
[
i
]
=
rules
[
2
*
n
+
i
];
}
__syncthreads
();
if
(
n
+
threadIdx
.
y
<
nHot
)
{
uInt
i
=
r
[
2
*
threadIdx
.
y
+
1
]
*
input_stride
;
uInt
o
=
r
[
2
*
threadIdx
.
y
]
*
output_stride
;
for
(
uInt
plane
=
threadIdx
.
x
;
plane
<
nPlanes
;
plane
+=
NTX
)
d_input_features
[
i
+
plane
]
+=
d_output_features
[
o
+
plane
];
}
__syncthreads
();
}
}
template
<
typename
T
>
void
UnPooling_BackwardPass
(
cudaStream_t
stream
,
T
*
d_input_features
,
T
*
d_output_features
,
uInt
nPlanes
,
uInt
input_stride
,
uInt
output_stride
,
uInt
*
rules
,
uInt
nHot
,
uInt
filterVolume
)
{
UnPooling_bp
<
T
,
32
,
32
><<<
32
,
dim3
(
32
,
32
),
0
,
stream
>>>
(
d_input_features
,
d_output_features
,
nPlanes
,
input_stride
,
output_stride
,
rules
,
nHot
);
}
#endif
/* GPU_UNPOOLING_H */
PyTorch/
sparseconvnet/SCN/generic/Geometry/ActivePoolingRules.h
→
sparseconvnet/SCN/generic/Geometry/ActivePoolingRules.h
View file @
1df7b845
File moved
Prev
1
…
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment