Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
SparseConvNet
Commits
2ad7baf8
Commit
2ad7baf8
authored
Oct 21, 2019
by
Benjamin Thomas Graham
Browse files
detach; batch_size; data_ptr
parent
16e4df34
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
197 additions
and
197 deletions
+197
-197
sparseconvnet/SCN/CPU/ActivePooling.cpp
sparseconvnet/SCN/CPU/ActivePooling.cpp
+4
-4
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
+8
-8
sparseconvnet/SCN/CPU/AveragePooling.cpp
sparseconvnet/SCN/CPU/AveragePooling.cpp
+10
-10
sparseconvnet/SCN/CPU/BatchNormalization.cpp
sparseconvnet/SCN/CPU/BatchNormalization.cpp
+7
-7
sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
+7
-7
sparseconvnet/SCN/CPU/Convolution.cpp
sparseconvnet/SCN/CPU/Convolution.cpp
+4
-4
sparseconvnet/SCN/CPU/IOLayers.cpp
sparseconvnet/SCN/CPU/IOLayers.cpp
+16
-16
sparseconvnet/SCN/CPU/LeakyReLU.cpp
sparseconvnet/SCN/CPU/LeakyReLU.cpp
+5
-5
sparseconvnet/SCN/CPU/MaxPooling.cpp
sparseconvnet/SCN/CPU/MaxPooling.cpp
+12
-12
sparseconvnet/SCN/CPU/SparseToDense.cpp
sparseconvnet/SCN/CPU/SparseToDense.cpp
+7
-7
sparseconvnet/SCN/CPU/UnPooling.cpp
sparseconvnet/SCN/CPU/UnPooling.cpp
+4
-4
sparseconvnet/SCN/CUDA/ActivePooling.cpp
sparseconvnet/SCN/CUDA/ActivePooling.cpp
+4
-4
sparseconvnet/SCN/CUDA/ActivePooling.cu
sparseconvnet/SCN/CUDA/ActivePooling.cu
+2
-2
sparseconvnet/SCN/CUDA/AffineReluTrivialConvolution.cpp
sparseconvnet/SCN/CUDA/AffineReluTrivialConvolution.cpp
+6
-6
sparseconvnet/SCN/CUDA/AveragePooling.cpp
sparseconvnet/SCN/CUDA/AveragePooling.cpp
+7
-7
sparseconvnet/SCN/CUDA/BatchNormalization.cpp
sparseconvnet/SCN/CUDA/BatchNormalization.cpp
+7
-7
sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp
sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp
+3
-3
sparseconvnet/SCN/CUDA/Convolution.cpp
sparseconvnet/SCN/CUDA/Convolution.cpp
+50
-50
sparseconvnet/SCN/CUDA/Deconvolution.cpp
sparseconvnet/SCN/CUDA/Deconvolution.cpp
+10
-10
sparseconvnet/SCN/CUDA/IOLayers.cpp
sparseconvnet/SCN/CUDA/IOLayers.cpp
+24
-24
No files found.
sparseconvnet/SCN/CPU/ActivePooling.cpp
View file @
2ad7baf8
...
@@ -55,8 +55,8 @@ void cpu_ActivePooling_updateOutput(
...
@@ -55,8 +55,8 @@ void cpu_ActivePooling_updateOutput(
output_features
.
resize_
({
batchSize
,
nPlanes
});
output_features
.
resize_
({
batchSize
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
ActivePooling_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
ActivePooling_ForwardPass
<
T
>
(
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
<
T
>
(),
batchSize
,
maxActive
,
output_features
.
data
_ptr
<
T
>
(),
batchSize
,
maxActive
,
nPlanes
,
_rules
,
average
);
nPlanes
,
_rules
,
average
);
}
}
...
@@ -74,7 +74,7 @@ void cpu_ActivePooling_updateGradInput(
...
@@ -74,7 +74,7 @@ void cpu_ActivePooling_updateGradInput(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
zero_
();
d_input_features
.
zero_
();
ActivePooling_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
ActivePooling_BackwardPass
<
T
>
(
d_input_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
<
T
>
(),
batchSize
,
d_output_features
.
data
_ptr
<
T
>
(),
batchSize
,
maxActive
,
nPlanes
,
_rules
,
average
);
maxActive
,
nPlanes
,
_rules
,
average
);
}
}
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
View file @
2ad7baf8
...
@@ -74,9 +74,9 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
...
@@ -74,9 +74,9 @@ double cpu_AffineReluTrivialConvolution_updateOutput(
/*float*/
at
::
Tensor
&
affineBias
,
/*float*/
at
::
Tensor
&
convWeight
)
{
/*float*/
at
::
Tensor
&
affineBias
,
/*float*/
at
::
Tensor
&
convWeight
)
{
output_features
.
resize_
({
input_features
.
size
(
0
),
convWeight
.
size
(
1
)});
output_features
.
resize_
({
input_features
.
size
(
0
),
convWeight
.
size
(
1
)});
AffineReluTrivialConvolution_ForwardPass
(
AffineReluTrivialConvolution_ForwardPass
(
input_features
.
data
<
T
>
(),
convWeight
.
size
(
0
),
input_features
.
stride
(
0
),
input_features
.
data
_ptr
<
T
>
(),
convWeight
.
size
(
0
),
input_features
.
stride
(
0
),
output_features
.
data
<
T
>
(),
convWeight
.
size
(
1
),
output_features
.
stride
(
0
),
output_features
.
data
_ptr
<
T
>
(),
convWeight
.
size
(
1
),
output_features
.
stride
(
0
),
affineWeight
.
data
<
T
>
(),
affineBias
.
data
<
T
>
(),
convWeight
.
data
<
T
>
(),
affineWeight
.
data
_ptr
<
T
>
(),
affineBias
.
data
_ptr
<
T
>
(),
convWeight
.
data
_ptr
<
T
>
(),
input_features
.
size
(
0
));
input_features
.
size
(
0
));
return
input_features
.
size
(
0
)
*
input_features
.
size
(
1
)
*
return
input_features
.
size
(
0
)
*
input_features
.
size
(
1
)
*
output_features
.
size
(
1
);
output_features
.
size
(
1
);
...
@@ -94,10 +94,10 @@ void cpu_AffineReluTrivialConvolution_backward(
...
@@ -94,10 +94,10 @@ void cpu_AffineReluTrivialConvolution_backward(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
AffineReluTrivialConvolution_BackwardPass
(
AffineReluTrivialConvolution_BackwardPass
(
input_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
convWeight
.
size
(
0
),
input_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
_ptr
<
T
>
(),
convWeight
.
size
(
0
),
input_features
.
stride
(
0
),
d_output_features
.
data
<
T
>
(),
convWeight
.
size
(
1
),
input_features
.
stride
(
0
),
d_output_features
.
data
_ptr
<
T
>
(),
convWeight
.
size
(
1
),
d_output_features
.
stride
(
0
),
affineWeight
.
data
<
T
>
(),
d_output_features
.
stride
(
0
),
affineWeight
.
data
_ptr
<
T
>
(),
d_affineWeight
.
data
<
T
>
(),
affineBias
.
data
<
T
>
(),
d_affineBias
.
data
<
T
>
(),
d_affineWeight
.
data
_ptr
<
T
>
(),
affineBias
.
data
_ptr
<
T
>
(),
d_affineBias
.
data
_ptr
<
T
>
(),
convWeight
.
data
<
T
>
(),
d_convWeight
.
data
<
T
>
(),
input_features
.
size
(
0
),
convWeight
.
data
_ptr
<
T
>
(),
d_convWeight
.
data
_ptr
<
T
>
(),
input_features
.
size
(
0
),
additiveGrad
);
additiveGrad
);
}
}
sparseconvnet/SCN/CPU/AveragePooling.cpp
View file @
2ad7baf8
...
@@ -49,8 +49,8 @@ void cpu_AveragePooling_updateOutput(
...
@@ -49,8 +49,8 @@ void cpu_AveragePooling_updateOutput(
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
iF
=
input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
for
(
const
auto
&
r
:
_rules
)
{
for
(
const
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
@@ -74,8 +74,8 @@ void cpu_AveragePooling_updateGradInput(
...
@@ -74,8 +74,8 @@ void cpu_AveragePooling_updateGradInput(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
diF
=
d_input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
for
(
const
auto
&
r
:
_rules
)
{
for
(
const
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
@@ -90,9 +90,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
...
@@ -90,9 +90,9 @@ void cpu_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context,
at
::
Tensor
&
Context
)
{
at
::
Tensor
&
Context
)
{
Int
nHot
=
rules
.
size
(
0
)
/
2
;
Int
nHot
=
rules
.
size
(
0
)
/
2
;
Int
nPlanes
=
context
.
size
(
1
);
Int
nPlanes
=
context
.
size
(
1
);
auto
iF
=
context
.
data
<
T
>
();
auto
iF
=
context
.
data
_ptr
<
T
>
();
auto
oF
=
Context
.
data
<
T
>
();
auto
oF
=
Context
.
data
_ptr
<
T
>
();
auto
r
=
rules
.
data
<
Int
>
();
auto
r
=
rules
.
data
_ptr
<
Int
>
();
Int
outSite
;
Int
outSite
;
#pragma omp parallel for private(outSite)
#pragma omp parallel for private(outSite)
for
(
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
for
(
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
...
@@ -107,9 +107,9 @@ void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
...
@@ -107,9 +107,9 @@ void cpu_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
at
::
Tensor
&
dContext
)
{
at
::
Tensor
&
dContext
)
{
Int
nHot
=
rules
.
size
(
0
)
/
2
;
Int
nHot
=
rules
.
size
(
0
)
/
2
;
Int
nPlanes
=
dcontext
.
size
(
1
);
Int
nPlanes
=
dcontext
.
size
(
1
);
auto
iF
=
dcontext
.
data
<
T
>
();
auto
iF
=
dcontext
.
data
_ptr
<
T
>
();
auto
oF
=
dContext
.
data
<
T
>
();
auto
oF
=
dContext
.
data
_ptr
<
T
>
();
auto
r
=
rules
.
data
<
Int
>
();
auto
r
=
rules
.
data
_ptr
<
Int
>
();
Int
outSite
;
Int
outSite
;
#pragma omp parallel for private(outSite)
#pragma omp parallel for private(outSite)
for
(
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
for
(
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
...
...
sparseconvnet/SCN/CPU/BatchNormalization.cpp
View file @
2ad7baf8
...
@@ -121,9 +121,9 @@ void cpu_BatchNormalization_updateOutput(
...
@@ -121,9 +121,9 @@ void cpu_BatchNormalization_updateOutput(
auto
input_stride
=
input_features
.
stride
(
0
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
BatchNormalization_ForwardPass
<
T
>
(
BatchNormalization_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nPlanes
,
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
_ptr
<
T
>
(),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
_ptr
<
T
>
(),
saveInvStd
.
data
<
T
>
(),
runningMean
.
data
<
T
>
(),
runningVar
.
data
<
T
>
(),
saveInvStd
.
data
_ptr
<
T
>
(),
runningMean
.
data
_ptr
<
T
>
(),
runningVar
.
data
_ptr
<
T
>
(),
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
eps
,
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
eps
,
momentum
,
train
,
leakiness
);
momentum
,
train
,
leakiness
);
}
}
...
@@ -147,10 +147,10 @@ void cpu_BatchNormalization_backward(
...
@@ -147,10 +147,10 @@ void cpu_BatchNormalization_backward(
auto
input_stride
=
input_features
.
stride
(
0
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
BatchNormalization_BackwardPass
<
T
>
(
BatchNormalization_BackwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
input_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
<
T
>
(),
d_output_features
.
data
<
T
>
(),
nPlanes
,
output_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
_ptr
<
T
>
(),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
_ptr
<
T
>
(),
saveInvStd
.
data
<
T
>
(),
runningMean
.
data
<
T
>
(),
runningVar
.
data
<
T
>
(),
saveInvStd
.
data
_ptr
<
T
>
(),
runningMean
.
data
_ptr
<
T
>
(),
runningVar
.
data
_ptr
<
T
>
(),
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
OptionalTensorData
<
T
>
(
d_weight
),
OptionalTensorData
<
T
>
(
d_bias
),
OptionalTensorData
<
T
>
(
d_weight
),
OptionalTensorData
<
T
>
(
d_bias
),
leakiness
);
leakiness
);
...
...
sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
View file @
2ad7baf8
...
@@ -11,9 +11,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
...
@@ -11,9 +11,9 @@ void cpu_BatchwiseMultiplicativeDropout_updateOutput(
output_features
.
resize_as_
(
input_features
);
output_features
.
resize_as_
(
input_features
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
nz
=
noise
.
data
<
T
>
();
auto
nz
=
noise
.
data
_ptr
<
T
>
();
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
for
(
Int
plane
=
0
,
o
=
row
*
nPlanes
,
i
=
row
*
nPlanes
;
plane
<
nPlanes
;
for
(
Int
plane
=
0
,
o
=
row
*
nPlanes
,
i
=
row
*
nPlanes
;
plane
<
nPlanes
;
plane
++
,
o
++
,
i
++
)
plane
++
,
o
++
,
i
++
)
...
@@ -28,10 +28,10 @@ void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
...
@@ -28,10 +28,10 @@ void cpu_BatchwiseMultiplicativeDropout_updateGradInput(
d_input_features
.
resize_as_
(
d_output_features
);
d_input_features
.
resize_as_
(
d_output_features
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
nz
=
noise
.
data
<
T
>
();
auto
nz
=
noise
.
data
_ptr
<
T
>
();
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
for
(
Int
plane
=
0
,
o
=
row
*
nPlanes
,
i
=
row
*
nPlanes
;
plane
<
nPlanes
;
for
(
Int
plane
=
0
,
o
=
row
*
nPlanes
,
i
=
row
*
nPlanes
;
plane
<
nPlanes
;
plane
++
,
o
++
,
i
++
)
plane
++
,
o
++
,
i
++
)
...
...
sparseconvnet/SCN/CPU/Convolution.cpp
View file @
2ad7baf8
...
@@ -10,8 +10,8 @@ at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
...
@@ -10,8 +10,8 @@ at::Tensor rule_index_select(at::Tensor &src, Int nRules, const Int *rules,
Int
groups
)
{
Int
groups
)
{
auto
planes
=
src
.
size
(
1
)
/
groups
;
auto
planes
=
src
.
size
(
1
)
/
groups
;
auto
target
=
at
::
empty
({
groups
,
nRules
,
planes
},
src
.
options
());
auto
target
=
at
::
empty
({
groups
,
nRules
,
planes
},
src
.
options
());
auto
s_ptr
=
src
.
data
<
T
>
();
auto
s_ptr
=
src
.
data
_ptr
<
T
>
();
auto
t_ptr
=
target
.
data
<
T
>
();
auto
t_ptr
=
target
.
data
_ptr
<
T
>
();
#pragma omp parallel for
#pragma omp parallel for
for
(
Int
i
=
0
;
i
<
nRules
;
++
i
)
{
for
(
Int
i
=
0
;
i
<
nRules
;
++
i
)
{
for
(
Int
g
=
0
;
g
<
groups
;
++
g
)
{
for
(
Int
g
=
0
;
g
<
groups
;
++
g
)
{
...
@@ -29,8 +29,8 @@ template <typename T>
...
@@ -29,8 +29,8 @@ template <typename T>
void
rule_index_add_
(
at
::
Tensor
&
target
,
at
::
Tensor
&
src
,
Int
nRules
,
void
rule_index_add_
(
at
::
Tensor
&
target
,
at
::
Tensor
&
src
,
Int
nRules
,
const
Int
*
rules
,
Int
groups
)
{
const
Int
*
rules
,
Int
groups
)
{
auto
planes
=
target
.
size
(
1
)
/
groups
;
auto
planes
=
target
.
size
(
1
)
/
groups
;
auto
s_ptr
=
src
.
data
<
T
>
();
auto
s_ptr
=
src
.
data
_ptr
<
T
>
();
auto
t_ptr
=
target
.
data
<
T
>
();
auto
t_ptr
=
target
.
data
_ptr
<
T
>
();
#pragma omp parallel for
#pragma omp parallel for
for
(
Int
i
=
0
;
i
<
nRules
;
++
i
)
{
for
(
Int
i
=
0
;
i
<
nRules
;
++
i
)
{
for
(
Int
g
=
0
;
g
<
groups
;
++
g
)
{
for
(
Int
g
=
0
;
g
<
groups
;
++
g
)
{
...
...
sparseconvnet/SCN/CPU/IOLayers.cpp
View file @
2ad7baf8
...
@@ -65,8 +65,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -65,8 +65,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
}
else
{
}
else
{
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
output_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
...
@@ -86,8 +86,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -86,8 +86,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
}
else
{
}
else
{
d_input_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
d_input_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
d_output_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
...
@@ -108,8 +108,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -108,8 +108,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
}
else
{
}
else
{
output_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
output_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
_ptr
<
T
>
(),
input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
input_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
...
@@ -129,8 +129,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -129,8 +129,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
}
else
{
}
else
{
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
d_input_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
...
@@ -155,8 +155,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -155,8 +155,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
}
else
{
}
else
{
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
output_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
...
@@ -178,8 +178,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -178,8 +178,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
}
else
{
}
else
{
d_input_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
d_input_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
d_output_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
...
@@ -201,8 +201,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -201,8 +201,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
}
else
{
}
else
{
output_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
output_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
_ptr
<
T
>
(),
input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
input_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
...
@@ -224,8 +224,8 @@ void cpu_BLOutputLayer_updateGradInput(
...
@@ -224,8 +224,8 @@ void cpu_BLOutputLayer_updateGradInput(
}
else
{
}
else
{
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
d_input_features
.
data
_ptr
<
T
>
(),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
sparseconvnet/SCN/CPU/LeakyReLU.cpp
View file @
2ad7baf8
...
@@ -9,8 +9,8 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
...
@@ -9,8 +9,8 @@ void cpu_LeakyReLU_updateOutput(/*float*/ at::Tensor &input_features,
/*float*/
at
::
Tensor
&
output_features
,
/*float*/
at
::
Tensor
&
output_features
,
T
alpha
)
{
T
alpha
)
{
output_features
.
resize_as_
(
input_features
);
output_features
.
resize_as_
(
input_features
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
n
=
input_features
.
numel
();
auto
n
=
input_features
.
numel
();
for
(
Int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
Int
i
=
0
;
i
<
n
;
i
++
)
{
...
@@ -25,9 +25,9 @@ void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
...
@@ -25,9 +25,9 @@ void cpu_LeakyReLU_updateGradInput(/*float*/ at::Tensor &input_features,
/*float*/
at
::
Tensor
&
d_output_features
,
/*float*/
at
::
Tensor
&
d_output_features
,
T
alpha
)
{
T
alpha
)
{
d_input_features
.
resize_as_
(
d_output_features
);
d_input_features
.
resize_as_
(
d_output_features
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
n
=
d_input_features
.
numel
();
auto
n
=
d_input_features
.
numel
();
for
(
Int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
Int
i
=
0
;
i
<
n
;
i
++
)
{
...
...
sparseconvnet/SCN/CPU/MaxPooling.cpp
View file @
2ad7baf8
...
@@ -49,8 +49,8 @@ void cpu_MaxPooling_updateOutput(
...
@@ -49,8 +49,8 @@ void cpu_MaxPooling_updateOutput(
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
iF
=
input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
@@ -74,10 +74,10 @@ void cpu_MaxPooling_updateGradInput(
...
@@ -74,10 +74,10 @@ void cpu_MaxPooling_updateGradInput(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
@@ -101,8 +101,8 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
...
@@ -101,8 +101,8 @@ void cpu_RandomizedStrideMaxPooling_updateOutput(
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
iF
=
input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
@@ -126,10 +126,10 @@ void cpu_RandomizedStrideMaxPooling_updateGradInput(
...
@@ -126,10 +126,10 @@ void cpu_RandomizedStrideMaxPooling_updateGradInput(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
...
sparseconvnet/SCN/CPU/SparseToDense.cpp
View file @
2ad7baf8
...
@@ -42,7 +42,7 @@ void cpu_SparseToDense_updateOutput(
...
@@ -42,7 +42,7 @@ void cpu_SparseToDense_updateOutput(
std
::
array
<
long
,
Dimension
+
2
>
sz
;
std
::
array
<
long
,
Dimension
+
2
>
sz
;
sz
[
0
]
=
m
.
grids
.
begin
()
->
second
.
size
();
// batch size
sz
[
0
]
=
m
.
grids
.
begin
()
->
second
.
size
();
// batch size
sz
[
1
]
=
nPlanes
;
sz
[
1
]
=
nPlanes
;
long
*
in_sz
=
inputSize
.
data
<
long
>
();
long
*
in_sz
=
inputSize
.
data
_ptr
<
long
>
();
for
(
Int
i
=
0
;
i
<
Dimension
;
++
i
)
for
(
Int
i
=
0
;
i
<
Dimension
;
++
i
)
sz
[
i
+
2
]
=
in_sz
[
i
];
sz
[
i
+
2
]
=
in_sz
[
i
];
output_features
.
resize_
(
sz
);
output_features
.
resize_
(
sz
);
...
@@ -51,9 +51,9 @@ void cpu_SparseToDense_updateOutput(
...
@@ -51,9 +51,9 @@ void cpu_SparseToDense_updateOutput(
if
(
input_features
.
ndimension
()
==
2
)
{
if
(
input_features
.
ndimension
()
==
2
)
{
const
auto
&
_rules
=
m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
const
auto
&
_rules
=
m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
Int
_nPlanes
=
input_features
.
size
(
1
);
Int
_nPlanes
=
input_features
.
size
(
1
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
long
spatialVolume
=
inputSize
.
prod
().
data
<
long
>
()[
0
];
long
spatialVolume
=
inputSize
.
prod
().
data
_ptr
<
long
>
()[
0
];
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
SparseToDense_ForwardPass
<
T
>
(
iF
,
oF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
SparseToDense_ForwardPass
<
T
>
(
iF
,
oF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
...
@@ -73,10 +73,10 @@ void cpu_SparseToDense_updateGradInput(
...
@@ -73,10 +73,10 @@ void cpu_SparseToDense_updateGradInput(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
input_features
.
ndimension
()
==
2
)
{
if
(
input_features
.
ndimension
()
==
2
)
{
const
auto
&
_rules
=
m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
const
auto
&
_rules
=
m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
long
spatialVolume
=
inputSize
.
prod
().
data
<
long
>
()[
0
];
long
spatialVolume
=
inputSize
.
prod
().
data
_ptr
<
long
>
()[
0
];
Int
_nPlanes
=
d_input_features
.
size
(
1
);
Int
_nPlanes
=
d_input_features
.
size
(
1
);
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
SparseToDense_BackwardPass
<
T
>
(
diF
,
doF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
SparseToDense_BackwardPass
<
T
>
(
diF
,
doF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
...
...
sparseconvnet/SCN/CPU/UnPooling.cpp
View file @
2ad7baf8
...
@@ -46,8 +46,8 @@ void cpu_UnPooling_updateOutput(
...
@@ -46,8 +46,8 @@ void cpu_UnPooling_updateOutput(
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
iF
=
input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
@@ -67,8 +67,8 @@ void cpu_UnPooling_updateGradInput(
...
@@ -67,8 +67,8 @@ void cpu_UnPooling_updateGradInput(
const
auto
&
_rules
=
const
auto
&
_rules
=
m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
auto
diF
=
d_input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
Int
nHot
=
r
.
size
()
/
2
;
Int
nHot
=
r
.
size
()
/
2
;
...
...
sparseconvnet/SCN/CUDA/ActivePooling.cpp
View file @
2ad7baf8
...
@@ -27,8 +27,8 @@ void cuda_ActivePooling_updateOutput(
...
@@ -27,8 +27,8 @@ void cuda_ActivePooling_updateOutput(
output_features
.
resize_
({
batchSize
,
nPlanes
});
output_features
.
resize_
({
batchSize
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
ActivePooling_ForwardPass
<
T
>
(
iF
,
oF
,
batchSize
,
maxActive
,
nPlanes
,
ActivePooling_ForwardPass
<
T
>
(
iF
,
oF
,
batchSize
,
maxActive
,
nPlanes
,
&
_rules
[
0
][
0
],
average
);
&
_rules
[
0
][
0
],
average
);
}
}
...
@@ -46,8 +46,8 @@ void cuda_ActivePooling_updateGradInput(
...
@@ -46,8 +46,8 @@ void cuda_ActivePooling_updateGradInput(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
ActivePooling_BackwardPass
<
T
>
(
diF
,
doF
,
batchSize
,
maxActive
,
nPlanes
,
ActivePooling_BackwardPass
<
T
>
(
diF
,
doF
,
batchSize
,
maxActive
,
nPlanes
,
&
_rules
[
0
][
0
],
average
);
&
_rules
[
0
][
0
],
average
);
...
...
sparseconvnet/SCN/CUDA/ActivePooling.cu
View file @
2ad7baf8
...
@@ -24,7 +24,7 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
...
@@ -24,7 +24,7 @@ void ActivePooling_ForwardPass(T *input_features, T *output_features,
const
Int
*
rules
,
bool
average
)
{
const
Int
*
rules
,
bool
average
)
{
auto
rulesBuffer
=
at
::
empty
({
1
<<
22
},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({
1
<<
22
},
at
::
CUDA
(
at_kINT
));
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
Int
rowBatchSize
=
std
::
min
((
Int
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
Int
rowBatchSize
=
std
::
min
((
Int
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
assert
(
rowBatchSize
>
0
);
assert
(
rowBatchSize
>
0
);
Int
kernelBlockDim
=
std
::
min
(
nPlanes
,
(
Int
)
32
);
Int
kernelBlockDim
=
std
::
min
(
nPlanes
,
(
Int
)
32
);
...
@@ -59,7 +59,7 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
...
@@ -59,7 +59,7 @@ void ActivePooling_BackwardPass(T *d_input_features, T *d_output_features,
Int
batchSize
,
Int
maxActive
,
Int
nPlanes
,
Int
batchSize
,
Int
maxActive
,
Int
nPlanes
,
const
Int
*
rules
,
bool
average
)
{
const
Int
*
rules
,
bool
average
)
{
auto
rulesBuffer
=
at
::
empty
({
1
<<
22
},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({
1
<<
22
},
at
::
CUDA
(
at_kINT
));
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
Int
rowBatchSize
=
std
::
min
((
Int
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
Int
rowBatchSize
=
std
::
min
((
Int
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
assert
(
rowBatchSize
>
0
);
assert
(
rowBatchSize
>
0
);
Int
kernelBlockDim
=
std
::
min
(
nPlanes
,
(
Int
)
32
);
Int
kernelBlockDim
=
std
::
min
(
nPlanes
,
(
Int
)
32
);
...
...
sparseconvnet/SCN/CUDA/AffineReluTrivialConvolution.cpp
View file @
2ad7baf8
...
@@ -32,8 +32,8 @@ double cuda_AffineReluTrivialConvolution_updateOutput(
...
@@ -32,8 +32,8 @@ double cuda_AffineReluTrivialConvolution_updateOutput(
output_features
.
resize_
({
input_features
.
size
(
0
),
convWeight
.
size
(
1
)});
output_features
.
resize_
({
input_features
.
size
(
0
),
convWeight
.
size
(
1
)});
dAffineReluTrivialConvolution_forward
<
T
>
(
dAffineReluTrivialConvolution_forward
<
T
>
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
_ptr
<
T
>
(),
affineWeight
.
data
<
T
>
(),
affineBias
.
data
<
T
>
(),
convWeight
.
data
<
T
>
(),
affineWeight
.
data
_ptr
<
T
>
(),
affineBias
.
data
_ptr
<
T
>
(),
convWeight
.
data
_ptr
<
T
>
(),
convWeight
.
size
(
0
),
input_features
.
stride
(
0
),
convWeight
.
size
(
1
),
convWeight
.
size
(
0
),
input_features
.
stride
(
0
),
convWeight
.
size
(
1
),
output_features
.
size
(
1
),
input_features
.
size
(
0
));
output_features
.
size
(
1
),
input_features
.
size
(
0
));
return
input_features
.
size
(
0
)
*
input_features
.
size
(
1
)
*
return
input_features
.
size
(
0
)
*
input_features
.
size
(
1
)
*
...
@@ -54,10 +54,10 @@ void cuda_AffineReluTrivialConvolution_backward(
...
@@ -54,10 +54,10 @@ void cuda_AffineReluTrivialConvolution_backward(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
dAffineReluTrivialConvolution_backward_dW
<
T
>
(
dAffineReluTrivialConvolution_backward_dW
<
T
>
(
input_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
input_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
<
T
>
(),
affineWeight
.
data
<
T
>
(),
d_output_features
.
data
_ptr
<
T
>
(),
affineWeight
.
data
_ptr
<
T
>
(),
d_affineWeight
.
data
<
T
>
(),
affineBias
.
data
<
T
>
(),
d_affineBias
.
data
<
T
>
(),
d_affineWeight
.
data
_ptr
<
T
>
(),
affineBias
.
data
_ptr
<
T
>
(),
d_affineBias
.
data
_ptr
<
T
>
(),
convWeight
.
data
<
T
>
(),
d_convWeight
.
data
<
T
>
(),
convWeight
.
size
(
0
),
convWeight
.
data
_ptr
<
T
>
(),
d_convWeight
.
data
_ptr
<
T
>
(),
convWeight
.
size
(
0
),
input_features
.
stride
(
0
),
convWeight
.
size
(
1
),
d_output_features
.
stride
(
0
),
input_features
.
stride
(
0
),
convWeight
.
size
(
1
),
d_output_features
.
stride
(
0
),
input_features
.
size
(
0
),
additiveGrad
);
input_features
.
size
(
0
),
additiveGrad
);
}
}
sparseconvnet/SCN/CUDA/AveragePooling.cpp
View file @
2ad7baf8
...
@@ -31,8 +31,8 @@ void cuda_AveragePooling_updateOutput(
...
@@ -31,8 +31,8 @@ void cuda_AveragePooling_updateOutput(
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
resize_
({
nActive
,
input_features
.
size
(
1
)
-
nFeaturesToDrop
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
iF
=
input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
iF
=
input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
cuda_AveragePooling_ForwardPass
<
T
>
(
iF
,
oF
,
nPlanes
,
input_features
.
size
(
1
),
cuda_AveragePooling_ForwardPass
<
T
>
(
iF
,
oF
,
nPlanes
,
input_features
.
size
(
1
),
output_features
.
size
(
1
),
_rules
,
output_features
.
size
(
1
),
_rules
,
_rules
.
size
());
_rules
.
size
());
...
@@ -53,8 +53,8 @@ void cuda_AveragePooling_updateGradInput(
...
@@ -53,8 +53,8 @@ void cuda_AveragePooling_updateGradInput(
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
resize_as_
(
input_features
);
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
diF
=
d_input_features
.
data
<
T
>
()
+
nFeaturesToDrop
;
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
()
+
nFeaturesToDrop
;
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
cuda_AveragePooling_BackwardPass
<
T
>
(
diF
,
doF
,
nPlanes
,
input_features
.
size
(
1
),
cuda_AveragePooling_BackwardPass
<
T
>
(
diF
,
doF
,
nPlanes
,
input_features
.
size
(
1
),
d_output_features
.
size
(
1
),
_rules
,
d_output_features
.
size
(
1
),
_rules
,
_rules
.
size
());
_rules
.
size
());
...
@@ -75,8 +75,8 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context
...
@@ -75,8 +75,8 @@ void cuda_CopyFeaturesHelper_updateOutput(at::Tensor &rules, at::Tensor &context
Int
nPlanes
=
context
.
size
(
1
);
Int
nPlanes
=
context
.
size
(
1
);
Int
nHot
=
rules
.
size
(
0
)
/
2
;
Int
nHot
=
rules
.
size
(
0
)
/
2
;
cuda_CopyFeaturesHelper_ForwardPass
<
T
>
(
context
.
data
<
T
>
(),
Context
.
data
<
T
>
(),
cuda_CopyFeaturesHelper_ForwardPass
<
T
>
(
context
.
data
_ptr
<
T
>
(),
Context
.
data
_ptr
<
T
>
(),
rules
.
data
<
Int
>
(),
nPlanes
,
nHot
);
rules
.
data
_ptr
<
Int
>
(),
nPlanes
,
nHot
);
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -87,5 +87,5 @@ void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
...
@@ -87,5 +87,5 @@ void cuda_CopyFeaturesHelper_updateGradInput(at::Tensor &rules,
Int
nPlanes
=
dcontext
.
size
(
1
);
Int
nPlanes
=
dcontext
.
size
(
1
);
Int
nHot
=
rules
.
size
(
0
)
/
2
;
Int
nHot
=
rules
.
size
(
0
)
/
2
;
cuda_CopyFeaturesHelper_BackwardPass
<
T
>
(
cuda_CopyFeaturesHelper_BackwardPass
<
T
>
(
dcontext
.
data
<
T
>
(),
dContext
.
data
<
T
>
(),
rules
.
data
<
Int
>
(),
nPlanes
,
nHot
);
dcontext
.
data
_ptr
<
T
>
(),
dContext
.
data
_ptr
<
T
>
(),
rules
.
data
_ptr
<
Int
>
(),
nPlanes
,
nHot
);
}
}
sparseconvnet/SCN/CUDA/BatchNormalization.cpp
View file @
2ad7baf8
...
@@ -33,9 +33,9 @@ void cuda_BatchNormalization_updateOutput(
...
@@ -33,9 +33,9 @@ void cuda_BatchNormalization_updateOutput(
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
bn_f
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nPlanes
,
bn_f
(
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
_ptr
<
T
>
(),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
_ptr
<
T
>
(),
saveInvStd
.
data
<
T
>
(),
runningMean
.
data
<
T
>
(),
runningVar
.
data
<
T
>
(),
saveInvStd
.
data
_ptr
<
T
>
(),
runningMean
.
data
_ptr
<
T
>
(),
runningVar
.
data
_ptr
<
T
>
(),
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
eps
,
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
eps
,
momentum
,
train
,
leakiness
);
momentum
,
train
,
leakiness
);
}
}
...
@@ -60,10 +60,10 @@ void cuda_BatchNormalization_backward(
...
@@ -60,10 +60,10 @@ void cuda_BatchNormalization_backward(
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
bn_b
(
input_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
bn_b
(
input_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
<
T
>
(),
d_output_features
.
data
<
T
>
(),
nPlanes
,
output_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
_ptr
<
T
>
(),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
_ptr
<
T
>
(),
saveInvStd
.
data
<
T
>
(),
runningMean
.
data
<
T
>
(),
runningVar
.
data
<
T
>
(),
saveInvStd
.
data
_ptr
<
T
>
(),
runningMean
.
data
_ptr
<
T
>
(),
runningVar
.
data
_ptr
<
T
>
(),
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
OptionalTensorData
<
T
>
(
d_weight
),
OptionalTensorData
<
T
>
(
d_bias
),
OptionalTensorData
<
T
>
(
d_weight
),
OptionalTensorData
<
T
>
(
d_bias
),
leakiness
);
leakiness
);
...
...
sparseconvnet/SCN/CUDA/BatchwiseMultiplicativeDropout.cpp
View file @
2ad7baf8
...
@@ -19,7 +19,7 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
...
@@ -19,7 +19,7 @@ void cuda_BatchwiseMultiplicativeDropout_updateOutput(
output_features
.
resize_as_
(
input_features
);
output_features
.
resize_as_
(
input_features
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
bmd_f
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
noise
.
data
<
T
>
(),
bmd_f
(
input_features
.
data
_ptr
<
T
>
(),
output_features
.
data
_ptr
<
T
>
(),
noise
.
data
_ptr
<
T
>
(),
nActive
,
nPlanes
,
alpha
);
nActive
,
nPlanes
,
alpha
);
}
}
...
@@ -32,6 +32,6 @@ void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
...
@@ -32,6 +32,6 @@ void cuda_BatchwiseMultiplicativeDropout_updateGradInput(
d_input_features
.
resize_as_
(
d_output_features
);
d_input_features
.
resize_as_
(
d_output_features
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
bmd_b
(
input_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
bmd_b
(
input_features
.
data
_ptr
<
T
>
(),
d_input_features
.
data
_ptr
<
T
>
(),
d_output_features
.
data
<
T
>
(),
noise
.
data
<
T
>
(),
nActive
,
nPlanes
,
alpha
);
d_output_features
.
data
_ptr
<
T
>
(),
noise
.
data
_ptr
<
T
>
(),
nActive
,
nPlanes
,
alpha
);
}
}
sparseconvnet/SCN/CUDA/Convolution.cpp
View file @
2ad7baf8
...
@@ -38,12 +38,12 @@ double cuda_Convolution_updateOutput(
...
@@ -38,12 +38,12 @@ double cuda_Convolution_updateOutput(
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
if
(
bias
.
numel
())
if
(
bias
.
numel
())
Convolution_fp_bias
(
oF
,
bias
.
data
<
T
>
(),
op
,
nActiveOut
);
Convolution_fp_bias
(
oF
,
bias
.
data
_ptr
<
T
>
(),
op
,
nActiveOut
);
else
else
output_features
.
zero_
();
output_features
.
zero_
();
...
@@ -76,17 +76,17 @@ void cuda_Convolution_backward(
...
@@ -76,17 +76,17 @@ void cuda_Convolution_backward(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
auto
dw
=
d_weight
.
data
<
T
>
();
auto
dw
=
d_weight
.
data
_ptr
<
T
>
();
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
op
,
op
*
nGroups
,
nGroups
);
if
(
d_bias
.
numel
())
{
if
(
d_bias
.
numel
())
{
auto
db
=
d_bias
.
data
<
T
>
();
auto
db
=
d_bias
.
data
_ptr
<
T
>
();
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
}
}
}
}
...
@@ -108,12 +108,12 @@ double cuda_SubmanifoldConvolution_updateOutput(
...
@@ -108,12 +108,12 @@ double cuda_SubmanifoldConvolution_updateOutput(
output_features
.
resize_
({
nActive
,
op
*
nGroups
});
output_features
.
resize_
({
nActive
,
op
*
nGroups
});
if
(
nActive
)
{
if
(
nActive
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
if
(
bias
.
numel
())
if
(
bias
.
numel
())
Convolution_fp_bias
(
oF
,
bias
.
data
<
T
>
(),
op
,
nActive
);
Convolution_fp_bias
(
oF
,
bias
.
data
_ptr
<
T
>
(),
op
,
nActive
);
else
else
output_features
.
zero_
();
output_features
.
zero_
();
...
@@ -143,17 +143,17 @@ void cuda_SubmanifoldConvolution_backward(
...
@@ -143,17 +143,17 @@ void cuda_SubmanifoldConvolution_backward(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
nActive
)
{
if
(
nActive
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
auto
dw
=
d_weight
.
data
<
T
>
();
auto
dw
=
d_weight
.
data
_ptr
<
T
>
();
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
op
,
op
*
nGroups
,
nGroups
);
if
(
d_bias
.
numel
())
{
if
(
d_bias
.
numel
())
{
auto
db
=
d_bias
.
data
<
T
>
();
auto
db
=
d_bias
.
data
_ptr
<
T
>
();
Convolution_bp_bias
(
doF
,
db
,
op
,
nActive
);
Convolution_bp_bias
(
doF
,
db
,
op
,
nActive
);
}
}
}
}
...
@@ -174,12 +174,12 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
...
@@ -174,12 +174,12 @@ double cuda_PermutohedralSubmanifoldConvolution_updateOutput(
output_features
.
resize_
({
nActive
,
op
*
nGroups
});
output_features
.
resize_
({
nActive
,
op
*
nGroups
});
if
(
nActive
)
{
if
(
nActive
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
if
(
bias
.
numel
())
if
(
bias
.
numel
())
Convolution_fp_bias
(
oF
,
bias
.
data
<
T
>
(),
op
,
nActive
);
Convolution_fp_bias
(
oF
,
bias
.
data
_ptr
<
T
>
(),
op
,
nActive
);
else
else
output_features
.
zero_
();
output_features
.
zero_
();
...
@@ -208,17 +208,17 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
...
@@ -208,17 +208,17 @@ void cuda_PermutohedralSubmanifoldConvolution_backward(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
nActive
)
{
if
(
nActive
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
auto
dw
=
d_weight
.
data
<
T
>
();
auto
dw
=
d_weight
.
data
_ptr
<
T
>
();
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
op
,
op
*
nGroups
,
nGroups
);
if
(
d_bias
.
numel
())
{
if
(
d_bias
.
numel
())
{
auto
db
=
d_bias
.
data
<
T
>
();
auto
db
=
d_bias
.
data
_ptr
<
T
>
();
Convolution_bp_bias
(
doF
,
db
,
op
,
nActive
);
Convolution_bp_bias
(
doF
,
db
,
op
,
nActive
);
}
}
}
}
...
@@ -243,12 +243,12 @@ double cuda_FullConvolution_updateOutput(
...
@@ -243,12 +243,12 @@ double cuda_FullConvolution_updateOutput(
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
if
(
bias
.
numel
())
if
(
bias
.
numel
())
Convolution_fp_bias
(
oF
,
bias
.
data
<
T
>
(),
op
,
nActiveOut
);
Convolution_fp_bias
(
oF
,
bias
.
data
_ptr
<
T
>
(),
op
,
nActiveOut
);
else
else
output_features
.
zero_
();
output_features
.
zero_
();
...
@@ -282,17 +282,17 @@ void cuda_FullConvolution_backward(
...
@@ -282,17 +282,17 @@ void cuda_FullConvolution_backward(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
auto
dw
=
d_weight
.
data
<
T
>
();
auto
dw
=
d_weight
.
data
_ptr
<
T
>
();
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
op
,
op
*
nGroups
,
nGroups
);
if
(
d_bias
.
numel
())
{
if
(
d_bias
.
numel
())
{
auto
db
=
d_bias
.
data
<
T
>
();
auto
db
=
d_bias
.
data
_ptr
<
T
>
();
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
}
}
}
}
...
@@ -315,12 +315,12 @@ double cuda_RandomizedStrideConvolution_updateOutput(
...
@@ -315,12 +315,12 @@ double cuda_RandomizedStrideConvolution_updateOutput(
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
if
(
bias
.
numel
())
if
(
bias
.
numel
())
Convolution_fp_bias
(
oF
,
bias
.
data
<
T
>
(),
op
,
nActiveOut
);
Convolution_fp_bias
(
oF
,
bias
.
data
_ptr
<
T
>
(),
op
,
nActiveOut
);
else
else
output_features
.
zero_
();
output_features
.
zero_
();
...
@@ -353,17 +353,17 @@ void cuda_RandomizedStrideConvolution_backward(
...
@@ -353,17 +353,17 @@ void cuda_RandomizedStrideConvolution_backward(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
auto
dw
=
d_weight
.
data
<
T
>
();
auto
dw
=
d_weight
.
data
_ptr
<
T
>
();
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
dConvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
op
,
op
*
nGroups
,
nGroups
);
if
(
d_bias
.
numel
())
{
if
(
d_bias
.
numel
())
{
auto
db
=
d_bias
.
data
<
T
>
();
auto
db
=
d_bias
.
data
_ptr
<
T
>
();
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
}
}
}
}
...
...
sparseconvnet/SCN/CUDA/Deconvolution.cpp
View file @
2ad7baf8
...
@@ -35,12 +35,12 @@ double cuda_Deconvolution_updateOutput(
...
@@ -35,12 +35,12 @@ double cuda_Deconvolution_updateOutput(
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
output_features
.
resize_
({
nActiveOut
,
op
*
nGroups
});
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
if
(
bias
.
numel
())
if
(
bias
.
numel
())
Convolution_fp_bias
(
oF
,
bias
.
data
<
T
>
(),
op
,
nActiveOut
);
Convolution_fp_bias
(
oF
,
bias
.
data
_ptr
<
T
>
(),
op
,
nActiveOut
);
else
else
output_features
.
zero_
();
output_features
.
zero_
();
...
@@ -73,16 +73,16 @@ void cuda_Deconvolution_backward(
...
@@ -73,16 +73,16 @@ void cuda_Deconvolution_backward(
d_input_features
.
zero_
();
d_input_features
.
zero_
();
if
(
nActiveOut
)
{
if
(
nActiveOut
)
{
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
auto
w
=
weight
.
data
<
T
>
();
auto
w
=
weight
.
data
_ptr
<
T
>
();
auto
dw
=
d_weight
.
data
<
T
>
();
auto
dw
=
d_weight
.
data
_ptr
<
T
>
();
dDeconvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
dDeconvolution_backward_dW2
<
T
>
(
iF
,
diF
,
doF
,
w
,
dw
,
_rules
,
ip
,
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
ip
*
nGroups
,
op
,
op
*
nGroups
,
nGroups
);
if
(
d_bias
.
numel
())
{
if
(
d_bias
.
numel
())
{
auto
db
=
d_bias
.
data
<
T
>
();
auto
db
=
d_bias
.
data
_ptr
<
T
>
();
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
Convolution_bp_bias
(
doF
,
db
,
op
,
nActiveOut
);
}
}
}
}
...
...
sparseconvnet/SCN/CUDA/IOLayers.cpp
View file @
2ad7baf8
...
@@ -34,9 +34,9 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -34,9 +34,9 @@ void cuda_InputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_fp
<
T
>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
InputLayer_fp
<
T
>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
mode
==
4
);
mode
==
4
);
}
}
...
@@ -59,9 +59,9 @@ void cuda_InputLayer_updateGradInput(
...
@@ -59,9 +59,9 @@ void cuda_InputLayer_updateGradInput(
d_input_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
d_input_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_bp
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
InputLayer_bp
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
mode
==
4
);
mode
==
4
);
}
}
...
@@ -84,9 +84,9 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -84,9 +84,9 @@ void cuda_OutputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
output_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_bp
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
false
);
InputLayer_bp
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
false
);
}
}
}
}
...
@@ -108,9 +108,9 @@ void cuda_OutputLayer_updateGradInput(
...
@@ -108,9 +108,9 @@ void cuda_OutputLayer_updateGradInput(
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_fp
<
T
>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
InputLayer_fp
<
T
>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
false
);
false
);
}
}
...
@@ -138,9 +138,9 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -138,9 +138,9 @@ void cuda_BLInputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
}
else
{
}
else
{
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_fp
<
T
>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
InputLayer_fp
<
T
>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
mode
==
4
);
mode
==
4
);
}
}
...
@@ -165,9 +165,9 @@ void cuda_BLInputLayer_updateGradInput(
...
@@ -165,9 +165,9 @@ void cuda_BLInputLayer_updateGradInput(
d_input_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
d_input_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_bp
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
InputLayer_bp
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
mode
==
4
);
mode
==
4
);
}
}
...
@@ -192,9 +192,9 @@ void cuda_BLOutputLayer_updateOutput(
...
@@ -192,9 +192,9 @@ void cuda_BLOutputLayer_updateOutput(
output_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
output_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
_ptr
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_bp
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
false
);
InputLayer_bp
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
false
);
}
}
}
}
...
@@ -217,9 +217,9 @@ void cuda_BLOutputLayer_updateGradInput(
...
@@ -217,9 +217,9 @@ void cuda_BLOutputLayer_updateGradInput(
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
rulesBuffer
=
at
::
empty
({(
int
)
rules
[
1
].
size
()},
at
::
CUDA
(
at_kINT
));
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
_ptr
<
T
>
();
auto
doF
=
d_output_features
.
data
<
T
>
();
auto
doF
=
d_output_features
.
data
_ptr
<
T
>
();
Int
*
rb
=
rulesBuffer
.
data
<
Int
>
();
Int
*
rb
=
rulesBuffer
.
data
_ptr
<
Int
>
();
InputLayer_fp
<
T
>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
InputLayer_fp
<
T
>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
rb
,
false
);
false
);
}
}
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment