Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
SparseConvNet
Commits
3ff930b7
Commit
3ff930b7
authored
Sep 14, 2017
by
Ed Ng
Committed by
GitHub
Sep 14, 2017
Browse files
Merge branch 'master' into batchwise_dropout
parents
df96d0c0
c8e8db32
Changes
39
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
544 additions
and
562 deletions
+544
-562
PyTorch/setup.py
PyTorch/setup.py
+1
-0
PyTorch/sparseconvnet/SCN/generic/CPU/BatchNormalization.cpp
PyTorch/sparseconvnet/SCN/generic/CPU/BatchNormalization.cpp
+41
-33
PyTorch/sparseconvnet/SCN/generic/CPU/Convolution.cpp
PyTorch/sparseconvnet/SCN/generic/CPU/Convolution.cpp
+53
-47
PyTorch/sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
PyTorch/sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
+33
-34
PyTorch/sparseconvnet/SCN/generic/CPU/SparseToDense.h
PyTorch/sparseconvnet/SCN/generic/CPU/SparseToDense.h
+17
-18
PyTorch/sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
...seconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
+0
-1
PyTorch/sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.h
...rseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.h
+67
-156
PyTorch/sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
PyTorch/sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
+24
-22
PyTorch/sparseconvnet/SCN/generic/GPU/Convolution.cu
PyTorch/sparseconvnet/SCN/generic/GPU/Convolution.cu
+78
-70
PyTorch/sparseconvnet/SCN/generic/GPU/Convolution.h
PyTorch/sparseconvnet/SCN/generic/GPU/Convolution.h
+20
-9
PyTorch/sparseconvnet/SCN/generic/GPU/Deconvolution.h
PyTorch/sparseconvnet/SCN/generic/GPU/Deconvolution.h
+20
-9
PyTorch/sparseconvnet/SCN/generic/GPU/SparseToDense.cu
PyTorch/sparseconvnet/SCN/generic/GPU/SparseToDense.cu
+31
-27
PyTorch/sparseconvnet/SCN/generic/GPU/SparseToDense.h
PyTorch/sparseconvnet/SCN/generic/GPU/SparseToDense.h
+19
-18
PyTorch/sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
.../sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
+18
-0
PyTorch/sparseconvnet/SCN/generic/Geometry/ConvolutionRules.h
...rch/sparseconvnet/SCN/generic/Geometry/ConvolutionRules.h
+28
-34
PyTorch/sparseconvnet/SCN/generic/Geometry/Metadata.cpp
PyTorch/sparseconvnet/SCN/generic/Geometry/Metadata.cpp
+4
-5
PyTorch/sparseconvnet/SCN/generic/Geometry/Metadata.h
PyTorch/sparseconvnet/SCN/generic/Geometry/Metadata.h
+12
-1
PyTorch/sparseconvnet/SCN/generic/Geometry/ValidConvolutionRules.h
...parseconvnet/SCN/generic/Geometry/ValidConvolutionRules.h
+5
-7
PyTorch/sparseconvnet/SCN/header_cpu.h
PyTorch/sparseconvnet/SCN/header_cpu.h
+62
-61
PyTorch/sparseconvnet/SCN/header_gpu.h
PyTorch/sparseconvnet/SCN/header_gpu.h
+11
-10
No files found.
PyTorch/setup.py
View file @
3ff930b7
...
@@ -35,6 +35,7 @@ if torch.cuda.is_available():
...
@@ -35,6 +35,7 @@ if torch.cuda.is_available():
'sparseconvnet/SCN/header_cpu.h'
,
'sparseconvnet/SCN/header_cpu.h'
,
'sparseconvnet/SCN/header_gpu.h'
],
'sparseconvnet/SCN/header_gpu.h'
],
sources
=
[],
sources
=
[],
include_dirs
=
[
os
.
path
.
expandvars
(
'$CUDA_HOME'
)
+
'/include'
],
extra_objects
=
[
extra_objects
=
[
this_dir
+
this_dir
+
'/sparseconvnet/SCN/init.cu.o'
],
'/sparseconvnet/SCN/init.cu.o'
],
...
...
PyTorch/sparseconvnet/SCN/generic/CPU/BatchNormalization.cpp
View file @
3ff930b7
...
@@ -14,18 +14,20 @@ extern "C" void scn_R_(BatchNormalization_updateOutput)(
...
@@ -14,18 +14,20 @@ extern "C" void scn_R_(BatchNormalization_updateOutput)(
THTensor
*
saveInvStd
,
THTensor
*
runningMean
,
THTensor
*
runningVar
,
THTensor
*
saveInvStd
,
THTensor
*
runningMean
,
THTensor
*
runningVar
,
THTensor
*
weight
,
THTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
THTensor
*
weight
,
THTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
real
leakiness
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BatchNormalization_ForwardPass
<
real
>
(
BatchNormalization_ForwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
THTensor_
(
data
)(
saveMean
),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
THTensor_
(
data
)(
saveInvStd
),
THTensor_
(
data
)(
runningMean
),
THTensor_
(
data
)(
saveMean
),
THTensor_
(
data
)(
saveInvStd
),
THTensor_
(
data
)(
runningVar
),
THOptionalTensorData
(
weight
),
THTensor_
(
data
)(
runningMean
),
THTensor_
(
data
)(
runningVar
),
THOptionalTensorData
(
bias
),
eps
,
momentum
,
train
,
leakiness
);
THOptionalTensorData
(
weight
),
THOptionalTensorData
(
bias
),
eps
,
momentum
,
train
,
leakiness
);
}
}
}
extern
"C"
void
scn_R_
(
BatchNormalizationInTensor_updateOutput
)(
extern
"C"
void
scn_R_
(
BatchNormalizationInTensor_updateOutput
)(
...
@@ -34,6 +36,7 @@ extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
...
@@ -34,6 +36,7 @@ extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
THTensor
*
weight
,
THTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
THTensor
*
weight
,
THTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
real
leakiness
)
{
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
input_stride
=
input_features
->
stride
[
0
];
...
@@ -41,10 +44,12 @@ extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
...
@@ -41,10 +44,12 @@ extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
BatchNormalization_ForwardPass
<
real
>
(
BatchNormalization_ForwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
THTensor_
(
data
)(
saveMean
),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
THTensor_
(
data
)(
saveInvStd
),
THTensor_
(
data
)(
runningMean
),
THTensor_
(
data
)(
saveMean
),
THTensor_
(
data
)(
saveInvStd
),
THTensor_
(
data
)(
runningVar
),
THOptionalTensorData
(
weight
),
THTensor_
(
data
)(
runningMean
),
THTensor_
(
data
)(
runningVar
),
THOptionalTensorData
(
bias
),
eps
,
momentum
,
train
,
leakiness
);
THOptionalTensorData
(
weight
),
THOptionalTensorData
(
bias
),
eps
,
momentum
,
train
,
leakiness
);
}
}
}
extern
"C"
void
scn_R_
(
BatchNormalization_backward
)(
extern
"C"
void
scn_R_
(
BatchNormalization_backward
)(
...
@@ -55,6 +60,7 @@ extern "C" void scn_R_(BatchNormalization_backward)(
...
@@ -55,6 +60,7 @@ extern "C" void scn_R_(BatchNormalization_backward)(
real
leakiness
)
{
real
leakiness
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
input_stride
=
input_features
->
stride
[
0
];
...
@@ -62,10 +68,12 @@ extern "C" void scn_R_(BatchNormalization_backward)(
...
@@ -62,10 +68,12 @@ extern "C" void scn_R_(BatchNormalization_backward)(
BatchNormalization_BackwardPass
<
real
>
(
BatchNormalization_BackwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
d_input_features
),
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
d_input_features
),
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
d_output_features
),
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
d_output_features
),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
THTensor_
(
data
)(
saveMean
),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
THTensor_
(
data
)(
saveInvStd
),
THTensor_
(
data
)(
runningMean
),
THTensor_
(
data
)(
saveMean
),
THTensor_
(
data
)(
saveInvStd
),
THTensor_
(
data
)(
runningVar
),
THOptionalTensorData
(
weight
),
THTensor_
(
data
)(
runningMean
),
THTensor_
(
data
)(
runningVar
),
THOptionalTensorData
(
bias
),
THOptionalTensorData
(
d_weight
),
THOptionalTensorData
(
weight
),
THOptionalTensorData
(
bias
),
THOptionalTensorData
(
d_bias
),
leakiness
);
THOptionalTensorData
(
d_weight
),
THOptionalTensorData
(
d_bias
),
leakiness
);
}
}
}
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/CPU/Convolution.cpp
View file @
3ff930b7
...
@@ -23,6 +23,8 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
...
@@ -23,6 +23,8 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
if
(
not
bias
)
if
(
not
bias
)
THTensor_
(
zero
)(
output_features
);
THTensor_
(
zero
)(
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
ip
=
input_features
->
size
[
1
];
...
@@ -31,9 +33,9 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
...
@@ -31,9 +33,9 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
auto
b
=
THOptionalTensorData
(
bias
);
auto
b
=
THOptionalTensorData
(
bias
);
Convolution_ForwardPass
(
iF
,
ip
,
ip
,
oF
,
op
,
op
,
w
,
b
,
_rules
,
nActive
,
Convolution_ForwardPass
(
iF
,
ip
,
ip
,
oF
,
op
,
op
,
w
,
b
,
_rules
,
nActive
,
THBlas_
(
gemm
));
THBlas_
(
gemm
));
double
flops
=
0
;
for
(
auto
&
r
:
_rules
)
for
(
auto
&
r
:
_rules
)
flops
+=
r
.
size
()
/
2
*
ip
*
op
;
flops
+=
r
.
size
()
/
2
*
ip
*
op
;
}
return
flops
;
return
flops
;
}
}
...
@@ -51,6 +53,7 @@ extern "C" void scn_DR_(Convolution_backward)(
...
@@ -51,6 +53,7 @@ extern "C" void scn_DR_(Convolution_backward)(
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
THTensor_
(
zero
)(
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
...
@@ -62,6 +65,7 @@ extern "C" void scn_DR_(Convolution_backward)(
...
@@ -62,6 +65,7 @@ extern "C" void scn_DR_(Convolution_backward)(
Convolution_BackwardPass
(
iF
,
diF
,
ip
,
ip
,
doF
,
op
,
op
,
w
,
dw
,
db
,
_rules
,
Convolution_BackwardPass
(
iF
,
diF
,
ip
,
ip
,
doF
,
op
,
op
,
w
,
dw
,
db
,
_rules
,
nActive
,
THBlas_
(
gemm
));
nActive
,
THBlas_
(
gemm
));
}
}
}
extern
"C"
double
scn_DR_
(
ValidConvolution_updateOutput
)(
extern
"C"
double
scn_DR_
(
ValidConvolution_updateOutput
)(
...
@@ -71,11 +75,13 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
...
@@ -71,11 +75,13 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
input_features
->
size
[
0
]
;
uInt
nActive
=
_m
.
getNActive
(
inputSize
)
;
THTensor_
(
resize2d
)(
output_features
,
nActive
,
weight
->
size
[
1
]);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
if
(
not
bias
)
THTensor_
(
zero
)(
output_features
);
THTensor_
(
zero
)(
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
ip
=
input_features
->
size
[
1
];
...
@@ -85,10 +91,9 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
...
@@ -85,10 +91,9 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
Convolution_ForwardPass
(
iF
,
ip
,
ip
,
oF
,
op
,
op
,
w
,
b
,
_rules
,
nActive
,
Convolution_ForwardPass
(
iF
,
ip
,
ip
,
oF
,
op
,
op
,
w
,
b
,
_rules
,
nActive
,
THBlas_
(
gemm
));
THBlas_
(
gemm
));
double
flops
=
0
;
for
(
auto
&
r
:
_rules
)
for
(
auto
&
r
:
_rules
)
flops
+=
r
.
size
()
/
2
*
ip
*
op
;
flops
+=
r
.
size
()
/
2
*
ip
*
op
;
}
return
flops
;
return
flops
;
}
}
...
@@ -100,10 +105,11 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
...
@@ -100,10 +105,11 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
input_features
->
size
[
0
]
;
uInt
nActive
=
_m
.
getNActive
(
inputSize
)
;
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
THTensor_
(
zero
)(
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
...
@@ -115,6 +121,6 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
...
@@ -115,6 +121,6 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
Convolution_BackwardPass
(
iF
,
diF
,
ip
,
ip
,
doF
,
op
,
op
,
w
,
dw
,
db
,
_rules
,
Convolution_BackwardPass
(
iF
,
diF
,
ip
,
ip
,
doF
,
op
,
op
,
w
,
dw
,
db
,
_rules
,
nActive
,
THBlas_
(
gemm
));
nActive
,
THBlas_
(
gemm
));
}
}
}
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
View file @
3ff930b7
...
@@ -9,34 +9,32 @@
...
@@ -9,34 +9,32 @@
#else
#else
#include "SparseToDense.h"
#include "SparseToDense.h"
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
THLongTensor
*
inputSize
,
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
void
**
m
,
THLongTensor
*
inputSize
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
in
put_features
,
THTensor
*
out
put_features
,
void
*
rulesBuffer
,
long
nPlanes
)
{
THTensor
*
output_features
,
void
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
{
{
long
sz
[
Dimension
+
2
];
long
sz
[
Dimension
+
2
];
sz
[
0
]
=
_m
.
inputSGs
->
size
();
sz
[
0
]
=
_m
.
grids
.
begin
()
->
second
.
size
();
sz
[
1
]
=
input_features
->
size
[
1
];
sz
[
1
]
=
nPlanes
;
// input_features->size[1];
for
(
int
i
=
0
;
i
<
Dimension
;
i
++
)
{
std
::
memcpy
(
sz
+
2
,
THLongTensor_data
(
inputSize
),
sizeof
(
long
)
*
Dimension
);
auto
x
=
THLongTensor_data
(
inputSize
)[
i
];
sz
[
i
+
2
]
=
x
;
}
THTensor_
(
resizeNd
)(
output_features
,
Dimension
+
2
,
sz
,
NULL
);
THTensor_
(
resizeNd
)(
output_features
,
Dimension
+
2
,
sz
,
NULL
);
THTensor_
(
zero
)(
output_features
);
THTensor_
(
zero
)(
output_features
);
}
}
if
(
input_features
->
nDimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
spatialVolume
=
_rules
.
size
();
uInt
nPlanes
=
input_features
->
size
[
1
];
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
long
spatialVolume
=
THLongTensor_prodall
(
inputSize
);
for
(
auto
&
r
:
_rules
)
{
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
uInt
nHot
=
r
.
size
()
/
2
;
SparseToDense_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
spatialVolume
,
&
r
[
0
],
SparseToDense_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
spatialVolume
,
&
r
[
0
],
nHot
);
nHot
);
oF
++
;
oF
+=
nPlanes
*
spatialVolume
;
}
}
}
}
}
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
...
@@ -44,12 +42,12 @@ extern "C" void scn_DR_(SparseToDense_updateGradInput)(
...
@@ -44,12 +42,12 @@ extern "C" void scn_DR_(SparseToDense_updateGradInput)(
THTensor
*
d_input_features
,
THTensor
*
d_output_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
,
void
*
rulesBuffer
)
{
void
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
THTensor_
(
zero
)(
d_input_features
);
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
spatialVolume
=
_rules
.
size
();
if
(
input_features
->
nDimension
==
2
)
{
long
spatialVolume
=
THLongTensor_prodall
(
inputSize
);
uInt
nPlanes
=
d_input_features
->
size
[
1
];
uInt
nPlanes
=
d_input_features
->
size
[
1
];
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
...
@@ -58,7 +56,8 @@ extern "C" void scn_DR_(SparseToDense_updateGradInput)(
...
@@ -58,7 +56,8 @@ extern "C" void scn_DR_(SparseToDense_updateGradInput)(
uInt
nHot
=
r
.
size
()
/
2
;
uInt
nHot
=
r
.
size
()
/
2
;
SparseToDense_BackwardPass
<
real
>
(
diF
,
doF
,
nPlanes
,
spatialVolume
,
&
r
[
0
],
SparseToDense_BackwardPass
<
real
>
(
diF
,
doF
,
nPlanes
,
spatialVolume
,
&
r
[
0
],
nHot
);
nHot
);
doF
++
;
doF
+=
nPlanes
*
spatialVolume
;
}
}
}
}
}
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/CPU/SparseToDense.h
View file @
3ff930b7
...
@@ -10,27 +10,26 @@
...
@@ -10,27 +10,26 @@
template
<
typename
T
>
template
<
typename
T
>
void
SparseToDense_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
void
SparseToDense_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
*
rules
,
int
nHot
)
{
int
nHot
)
{
for
(
uInt
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
for
(
uInt
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
T
*
i
=
&
input_features
[
rules
[
2
*
outSite
]
*
nPlanes
]
;
T
*
i
=
input_features
+
rules
[
2
*
outSite
]
*
nPlanes
;
uInt
sample
=
rules
[
2
*
outSite
+
1
];
T
*
o
=
output_features
+
rules
[
2
*
outSite
+
1
];
for
(
uInt
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
for
(
uInt
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
output_features
[(
sample
*
nPlanes
+
plane
)
*
spatialVolume
]
=
i
[
plane
];
o
[
plane
*
spatialVolume
]
=
i
[
plane
];
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
SparseToDense_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
void
SparseToDense_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
*
rules
,
int
nHot
)
{
int
nHot
)
{
for
(
uInt
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
for
(
uInt
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
T
*
di
=
&
d_input_features
[
rules
[
2
*
outSite
]
*
nPlanes
]
;
T
*
d
_
i
=
d_input_features
+
rules
[
2
*
outSite
]
*
nPlanes
;
uInt
sample
=
rules
[
2
*
outSite
+
1
];
auto
d_o
=
d_output_features
+
rules
[
2
*
outSite
+
1
];
for
(
uInt
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
for
(
uInt
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
di
[
plane
]
=
d_output_features
[(
sample
*
nPlanes
+
plane
)
*
spatialVolume
];
d_i
[
plane
]
=
d_o
[
plane
*
spatialVolume
];
}
}
}
}
#endif
/* CPU_SPARSETODENSE_H */
#endif
/* CPU_SPARSETODENSE_H */
PyTorch/sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
View file @
3ff930b7
...
@@ -10,7 +10,6 @@
...
@@ -10,7 +10,6 @@
#include "AffineReluTrivialConvolution.h"
#include "AffineReluTrivialConvolution.h"
#include <algorithm>
#include <algorithm>
#include <iostream>
extern
"C"
void
scn_R_
(
AffineReluTrivialConvolution_updateOutput
)(
extern
"C"
void
scn_R_
(
AffineReluTrivialConvolution_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
...
...
PyTorch/sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.h
View file @
3ff930b7
...
@@ -155,6 +155,27 @@ __global__ void dAffineReluTrivialConvolution_forwardB(
...
@@ -155,6 +155,27 @@ __global__ void dAffineReluTrivialConvolution_forwardB(
}
}
}
}
#define FOO(T, K, V) \
{ \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
uInt o = (nActive / K) * K; \
if (o > 0) \
dAffineReluTrivialConvolution_forwardA<T, K, V> << < \
dim3(std::min(o / K, (uInt)512), output_nPlanes / K), \
dim3(K, K / V), 0, THCState_getCurrentStream(state)>>> \
(inFeatures, outFeatures, affineWeight, affineBias, convWeight, \
input_nPlanes, input_stride, output_nPlanes, output_stride, o); \
if (nActive > o) \
dAffineReluTrivialConvolution_forwardB<T, K, V> << < \
dim3(1, output_nPlanes / K), dim3(K, K / V), 0, \
THCState_getCurrentStream(state)>>> \
(inFeatures + o * input_stride, outFeatures + o * output_stride, \
affineWeight, affineBias, convWeight, input_nPlanes, \
input_stride, output_nPlanes, output_stride, nActive - o); \
return; \
} \
}
template
<
typename
T
>
template
<
typename
T
>
void
dAffineReluTrivialConvolution_forward
(
T
*
inFeatures
,
T
*
outFeatures
,
void
dAffineReluTrivialConvolution_forward
(
T
*
inFeatures
,
T
*
outFeatures
,
T
*
affineWeight
,
T
*
affineBias
,
T
*
affineWeight
,
T
*
affineBias
,
...
@@ -162,92 +183,25 @@ void dAffineReluTrivialConvolution_forward(T *inFeatures, T *outFeatures,
...
@@ -162,92 +183,25 @@ void dAffineReluTrivialConvolution_forward(T *inFeatures, T *outFeatures,
uInt
input_stride
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
nActive
)
{
uInt
output_stride
,
uInt
nActive
)
{
{
const
uInt
K
=
64
;
FOO
(
T
,
64
,
16
)
const
uInt
V
=
16
;
FOO
(
T
,
32
,
8
)
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
FOO
(
T
,
16
,
4
)
uInt
o
=
(
nActive
/
K
)
*
K
;
FOO
(
T
,
8
,
2
)
if
(
o
>
0
)
dAffineReluTrivialConvolution_forwardA
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
outFeatures
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_forwardB
<
T
,
K
,
V
><<<
dim3
(
1
,
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
outFeatures
+
o
*
output_stride
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
);
return
;
}
}
{
const
uInt
K
=
32
;
const
uInt
V
=
4
;
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
uInt
o
=
(
nActive
/
K
)
*
K
;
if
(
o
>
0
)
dAffineReluTrivialConvolution_forwardA
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
outFeatures
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_forwardB
<
T
,
K
,
V
><<<
dim3
(
1
,
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
outFeatures
+
o
*
output_stride
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
);
return
;
}
}
{
const
uInt
K
=
16
;
const
uInt
V
=
4
;
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
uInt
o
=
(
nActive
/
K
)
*
K
;
if
(
o
>
0
)
dAffineReluTrivialConvolution_forwardA
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
outFeatures
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_forwardB
<
T
,
K
,
V
><<<
dim3
(
1
,
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
outFeatures
+
o
*
output_stride
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
);
return
;
}
}
{
const
uInt
K
=
8
;
const
uInt
V
=
2
;
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
uInt
o
=
(
nActive
/
K
)
*
K
;
if
(
o
>
0
)
dAffineReluTrivialConvolution_forwardA
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
outFeatures
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_forwardB
<
T
,
K
,
V
><<<
dim3
(
1
,
output_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
outFeatures
+
o
*
output_stride
,
affineWeight
,
affineBias
,
convWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
);
return
;
}
}
assert
(
false
);
assert
(
false
);
}
}
template
<
>
void
dAffineReluTrivialConvolution_forward
<
double
>
(
double
*
inFeatures
,
double
*
outFeatures
,
double
*
affineWeight
,
double
*
affineBias
,
double
*
convWeight
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
nActive
)
{
FOO
(
double
,
32
,
8
)
FOO
(
double
,
16
,
4
)
FOO
(
double
,
8
,
2
)
assert
(
false
);
}
#undef FOO
// dOutput x W^T -> dInput and
// dOutput x W^T -> dInput and
// Input^T x dOutput -> dW
// Input^T x dOutput -> dW
...
@@ -449,84 +403,41 @@ __global__ void dAffineReluTrivialConvolution_backward_dW_B(
...
@@ -449,84 +403,41 @@ __global__ void dAffineReluTrivialConvolution_backward_dW_B(
atomicAdd
(
&
dAffineBias
[
tx
],
dAB
);
atomicAdd
(
&
dAffineBias
[
tx
],
dAB
);
}
}
#define FOO(T, K, V) \
{ \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
uInt o = (nActive / K) * K; \
if (o > 0) \
dAffineReluTrivialConvolution_backward_dW_A<T, K, V> << < \
dim3(std::min(o / K, (uInt)512), input_nPlanes / K), \
dim3(K, K / V), 0, THCState_getCurrentStream(state)>>> \
(inFeatures, dInFeatures, dOutFeatures, affineWeight, \
dAffineWeight, affineBias, dAffineBias, convWeight, dConvWeight, \
input_nPlanes, input_stride, output_nPlanes, output_stride, o, \
additiveGrad); \
if (nActive > o) \
dAffineReluTrivialConvolution_backward_dW_B<T, K, V> << < \
dim3(1, input_nPlanes / K), dim3(K, K / V), 0, \
THCState_getCurrentStream(state)>>> \
(inFeatures + o * input_stride, dInFeatures + o * input_stride, \
dOutFeatures + o * output_stride, affineWeight, dAffineWeight, \
affineBias, dAffineBias, convWeight, dConvWeight, input_nPlanes, \
input_stride, output_nPlanes, output_stride, nActive - o, \
additiveGrad); \
return; \
} \
}
template
<
typename
T
>
template
<
typename
T
>
void
dAffineReluTrivialConvolution_backward_dW
(
void
dAffineReluTrivialConvolution_backward_dW
(
T
*
inFeatures
,
T
*
dInFeatures
,
T
*
dOutFeatures
,
T
*
affineWeight
,
T
*
inFeatures
,
T
*
dInFeatures
,
T
*
dOutFeatures
,
T
*
affineWeight
,
T
*
dAffineWeight
,
T
*
affineBias
,
T
*
dAffineBias
,
T
*
convWeight
,
T
*
dAffineWeight
,
T
*
affineBias
,
T
*
dAffineBias
,
T
*
convWeight
,
T
*
dConvWeight
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
T
*
dConvWeight
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
nActive
,
bool
additiveGrad
)
{
uInt
output_stride
,
uInt
nActive
,
bool
additiveGrad
)
{
{
FOO
(
T
,
32
,
8
)
const
uInt
K
=
32
;
FOO
(
T
,
16
,
4
)
const
uInt
V
=
8
;
FOO
(
T
,
8
,
2
)
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
uInt
o
=
(
nActive
/
K
)
*
K
;
if
(
o
>
0
)
dAffineReluTrivialConvolution_backward_dW_A
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
input_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
dInFeatures
,
dOutFeatures
,
affineWeight
,
dAffineWeight
,
affineBias
,
dAffineBias
,
convWeight
,
dConvWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
,
additiveGrad
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_backward_dW_B
<
T
,
K
,
V
><<<
dim3
(
1
,
input_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
dInFeatures
+
o
*
input_stride
,
dOutFeatures
+
o
*
output_stride
,
affineWeight
,
dAffineWeight
,
affineBias
,
dAffineBias
,
convWeight
,
dConvWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
,
additiveGrad
);
return
;
}
}
{
const
uInt
K
=
16
;
const
uInt
V
=
4
;
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
uInt
o
=
(
nActive
/
K
)
*
K
;
if
(
o
>
0
)
dAffineReluTrivialConvolution_backward_dW_A
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
input_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
dInFeatures
,
dOutFeatures
,
affineWeight
,
dAffineWeight
,
affineBias
,
dAffineBias
,
convWeight
,
dConvWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
,
additiveGrad
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_backward_dW_B
<
T
,
K
,
V
><<<
dim3
(
1
,
input_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
dInFeatures
+
o
*
input_stride
,
dOutFeatures
+
o
*
output_stride
,
affineWeight
,
dAffineWeight
,
affineBias
,
dAffineBias
,
convWeight
,
dConvWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
,
additiveGrad
);
return
;
}
}
{
const
uInt
K
=
8
;
const
uInt
V
=
2
;
if
(
input_nPlanes
%
K
==
0
and
output_nPlanes
%
K
==
0
)
{
uInt
o
=
(
nActive
/
K
)
*
K
;
if
(
o
>
0
)
dAffineReluTrivialConvolution_backward_dW_A
<
T
,
K
,
V
><<<
dim3
(
std
::
min
(
o
/
K
,
(
uInt
)
512
),
input_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
,
dInFeatures
,
dOutFeatures
,
affineWeight
,
dAffineWeight
,
affineBias
,
dAffineBias
,
convWeight
,
dConvWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
o
,
additiveGrad
);
if
(
nActive
>
o
)
dAffineReluTrivialConvolution_backward_dW_B
<
T
,
K
,
V
><<<
dim3
(
1
,
input_nPlanes
/
K
),
dim3
(
K
,
K
/
V
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
inFeatures
+
o
*
input_stride
,
dInFeatures
+
o
*
input_stride
,
dOutFeatures
+
o
*
output_stride
,
affineWeight
,
dAffineWeight
,
affineBias
,
dAffineBias
,
convWeight
,
dConvWeight
,
input_nPlanes
,
input_stride
,
output_nPlanes
,
output_stride
,
nActive
-
o
,
additiveGrad
);
return
;
}
}
}
}
#undef FOO
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
View file @
3ff930b7
...
@@ -30,13 +30,14 @@ extern "C" void scn_R_(BatchNormalization_updateOutput)(
...
@@ -30,13 +30,14 @@ extern "C" void scn_R_(BatchNormalization_updateOutput)(
real
leakiness
)
{
real
leakiness
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_F_MACRO
(
16
)
BN_F_MACRO
(
16
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
}
}
}
extern
"C"
void
scn_R_
(
BatchNormalizationInTensor_updateOutput
)(
extern
"C"
void
scn_R_
(
BatchNormalizationInTensor_updateOutput
)(
...
@@ -44,14 +45,14 @@ extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
...
@@ -44,14 +45,14 @@ extern "C" void scn_R_(BatchNormalizationInTensor_updateOutput)(
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
THCTensor
*
weight
,
THCTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
real
leakiness
)
{
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_F_MACRO
(
16
)
BN_F_MACRO
(
16
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
}
}
}
#undef BN_F_MACRO
#undef BN_F_MACRO
...
@@ -81,12 +82,13 @@ extern "C" void scn_R_(BatchNormalization_backward)(
...
@@ -81,12 +82,13 @@ extern "C" void scn_R_(BatchNormalization_backward)(
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
real
leakiness
)
{
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
real
leakiness
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_B_MACRO
(
16
)
BN_B_MACRO
(
16
)
else
BN_B_MACRO
(
12
)
else
BN_B_MACRO
(
8
)
else
BN_B_MACRO
(
4
)
else
BN_B_MACRO
(
1
)
else
BN_B_MACRO
(
12
)
else
BN_B_MACRO
(
8
)
else
BN_B_MACRO
(
4
)
else
BN_B_MACRO
(
1
)
}
}
}
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/GPU/Convolution.cu
View file @
3ff930b7
...
@@ -25,12 +25,13 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
...
@@ -25,12 +25,13 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
if
(
not
bias
)
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
double
flops
=
0
;
if
(
bias
)
{
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
...
@@ -47,6 +48,7 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
...
@@ -47,6 +48,7 @@ extern "C" double scn_DR_(Convolution_updateOutput)(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
return
flops
;
}
}
...
@@ -63,6 +65,7 @@ extern "C" void scn_DR_(Convolution_backward)(
...
@@ -63,6 +65,7 @@ extern "C" void scn_DR_(Convolution_backward)(
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
...
@@ -71,9 +74,9 @@ extern "C" void scn_DR_(Convolution_backward)(
...
@@ -71,9 +74,9 @@ extern "C" void scn_DR_(Convolution_backward)(
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
op
,
op
,
THCState_getCurrentStream
(
state
));
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
if
(
d_bias
)
{
...
@@ -81,6 +84,7 @@ extern "C" void scn_DR_(Convolution_backward)(
...
@@ -81,6 +84,7 @@ extern "C" void scn_DR_(Convolution_backward)(
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
THCState_getCurrentStream
(
state
));
}
}
}
}
}
extern
"C"
double
scn_DR_
(
ValidConvolution_updateOutput
)(
extern
"C"
double
scn_DR_
(
ValidConvolution_updateOutput
)(
...
@@ -89,17 +93,18 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
...
@@ -89,17 +93,18 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
input_features
->
size
[
0
]
;
uInt
nActive
=
_m
.
getNActive
(
inputSize
)
;
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
double
flops
=
0
;
if
(
bias
)
{
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
...
@@ -116,6 +121,7 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
...
@@ -116,6 +121,7 @@ extern "C" double scn_DR_(ValidConvolution_updateOutput)(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
return
flops
;
}
}
...
@@ -126,10 +132,11 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
...
@@ -126,10 +132,11 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
THCTensor
*
d_bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
THCTensor
*
d_bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
auto
_rules
=
_m
.
getValidRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
input_features
->
size
[
0
]
;
uInt
nActive
=
_m
.
getNActive
(
inputSize
)
;
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
...
@@ -138,9 +145,9 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
...
@@ -138,9 +145,9 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
op
,
op
,
THCState_getCurrentStream
(
state
));
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
if
(
d_bias
)
{
...
@@ -148,6 +155,7 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
...
@@ -148,6 +155,7 @@ extern "C" void scn_DR_(ValidConvolution_backward)(
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
THCState_getCurrentStream
(
state
));
}
}
}
}
}
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/GPU/Convolution.h
View file @
3ff930b7
...
@@ -184,7 +184,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, uInt *rules,
...
@@ -184,7 +184,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, uInt *rules,
}
}
}
}
#define FOO(K, V)
\
#define FOO(
T,
K, V) \
{ \
{ \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
uInt o = (nHot / K) * K; \
uInt o = (nHot / K) * K; \
...
@@ -208,10 +208,21 @@ void dConvolution_forward(T *inFeatures, T *outFeatures, T *w, uInt *rules,
...
@@ -208,10 +208,21 @@ void dConvolution_forward(T *inFeatures, T *outFeatures, T *w, uInt *rules,
uInt
nHot
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
nHot
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
cudaStream_t
stream
)
{
cudaStream_t
stream
)
{
FOO
(
64
,
16
)
FOO
(
T
,
64
,
16
)
FOO
(
32
,
8
)
FOO
(
T
,
32
,
8
)
FOO
(
16
,
4
)
FOO
(
T
,
16
,
4
)
FOO
(
8
,
2
)
FOO
(
T
,
8
,
2
)
assert
(
false
);
}
template
<
>
void
dConvolution_forward
<
double
>
(
double
*
inFeatures
,
double
*
outFeatures
,
double
*
w
,
uInt
*
rules
,
uInt
nHot
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
cudaStream_t
stream
)
{
FOO
(
double
,
32
,
8
)
FOO
(
double
,
16
,
4
)
FOO
(
double
,
8
,
2
)
assert
(
false
);
assert
(
false
);
}
}
#undef FOO
#undef FOO
...
@@ -378,7 +389,7 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
...
@@ -378,7 +389,7 @@ dConvolution_KMxKN_backward_dW_B(T *inFeatures, T *dInFeatures, T *dOutFeatures,
}
}
}
}
#define FOO(K, V)
\
#define FOO(
T,
K, V) \
{ \
{ \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
uInt o = (nHot / K) * K; \
uInt o = (nHot / K) * K; \
...
@@ -404,9 +415,9 @@ void dConvolution_backward_dW(T *inFeatures, T *dInFeatures, T *dOutFeatures,
...
@@ -404,9 +415,9 @@ void dConvolution_backward_dW(T *inFeatures, T *dInFeatures, T *dOutFeatures,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
cudaStream_t
stream
)
{
cudaStream_t
stream
)
{
FOO
(
32
,
8
)
FOO
(
T
,
32
,
8
)
FOO
(
16
,
4
)
FOO
(
T
,
16
,
4
)
FOO
(
8
,
2
)
FOO
(
T
,
8
,
2
)
assert
(
false
);
assert
(
false
);
}
}
#undef FOO
#undef FOO
...
...
PyTorch/sparseconvnet/SCN/generic/GPU/Deconvolution.h
View file @
3ff930b7
...
@@ -153,7 +153,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, uInt *rules,
...
@@ -153,7 +153,7 @@ dDeconvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w, uInt *rules,
}
}
}
}
#define FOO(K, V)
\
#define FOO(
T,
K, V) \
{ \
{ \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
uInt o = (nHot / K) * K; \
uInt o = (nHot / K) * K; \
...
@@ -177,10 +177,21 @@ void dDeconvolution_forward(T *inFeatures, T *outFeatures, T *w, uInt *rules,
...
@@ -177,10 +177,21 @@ void dDeconvolution_forward(T *inFeatures, T *outFeatures, T *w, uInt *rules,
uInt
nHot
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
nHot
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
cudaStream_t
stream
)
{
cudaStream_t
stream
)
{
FOO
(
64
,
16
)
FOO
(
T
,
64
,
16
)
FOO
(
32
,
8
)
FOO
(
T
,
32
,
8
)
FOO
(
16
,
4
)
FOO
(
T
,
16
,
4
)
FOO
(
8
,
2
)
FOO
(
T
,
8
,
2
)
assert
(
false
);
}
template
<
>
void
dDeconvolution_forward
<
double
>
(
double
*
inFeatures
,
double
*
outFeatures
,
double
*
w
,
uInt
*
rules
,
uInt
nHot
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
cudaStream_t
stream
)
{
FOO
(
double
,
32
,
8
)
FOO
(
double
,
16
,
4
)
FOO
(
double
,
8
,
2
)
assert
(
false
);
assert
(
false
);
}
}
#undef FOO
#undef FOO
...
@@ -345,7 +356,7 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B(
...
@@ -345,7 +356,7 @@ __global__ void dDeconvolution_KMxKN_backward_dW_B(
}
}
}
}
#define FOO(K, V)
\
#define FOO(
T,
K, V) \
{ \
{ \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
if (input_nPlanes % K == 0 and output_nPlanes % K == 0) { \
uInt o = (nHot / K) * K; \
uInt o = (nHot / K) * K; \
...
@@ -371,9 +382,9 @@ void dDeconvolution_backward_dW(T *inFeatures, T *dInFeatures, T *dOutFeatures,
...
@@ -371,9 +382,9 @@ void dDeconvolution_backward_dW(T *inFeatures, T *dInFeatures, T *dOutFeatures,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
input_nPlanes
,
uInt
input_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
uInt
output_nPlanes
,
uInt
output_stride
,
cudaStream_t
stream
)
{
cudaStream_t
stream
)
{
FOO
(
32
,
8
)
FOO
(
T
,
32
,
8
)
FOO
(
16
,
4
)
FOO
(
T
,
16
,
4
)
FOO
(
8
,
2
)
FOO
(
T
,
8
,
2
)
assert
(
false
);
assert
(
false
);
}
}
#undef FOO
#undef FOO
...
...
PyTorch/sparseconvnet/SCN/generic/GPU/SparseToDense.cu
View file @
3ff930b7
...
@@ -9,50 +9,54 @@
...
@@ -9,50 +9,54 @@
#else
#else
#include "SparseToDense.h"
#include "SparseToDense.h"
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
THLongTensor
*
inputSize
,
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
void
**
m
,
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCITensor
*
rulesBuffer
,
long
nPlanes
)
{
THCTensor
*
output_features
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
long
spatialVolume
=
1
;
{
long
sz
[
Dimension
+
2
];
long
sz
[
Dimension
+
2
];
sz
[
0
]
=
_m
.
inputSGs
->
size
();
sz
[
0
]
=
_m
.
grids
.
begin
()
->
second
.
size
();
sz
[
1
]
=
input_features
->
size
[
1
];
sz
[
1
]
=
nPlanes
;
//
input_features->size[1];
for
(
int
i
=
0
;
i
<
Dimension
;
i
++
)
{
for
(
int
i
=
0
;
i
<
Dimension
;
i
++
)
{
auto
x
=
THLongTensor_data
(
inputSize
)[
i
];
auto
x
=
THLongTensor_data
(
inputSize
)[
i
];
sz
[
i
+
2
]
=
x
;
sz
[
i
+
2
]
=
x
;
spatialVolume
*=
x
;
}
}
THCTensor_
(
resizeNd
)(
state
,
output_features
,
Dimension
+
2
,
sz
,
NULL
);
THCTensor_
(
resizeNd
)(
state
,
output_features
,
Dimension
+
2
,
sz
,
NULL
);
THCTensor_
(
zero
)(
state
,
output_features
);
THCTensor_
(
zero
)(
state
,
output_features
);
}
}
if
(
input_features
->
nDimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
spatialVolume
=
_rules
.
size
();
uInt
nPlanes
=
input_features
->
size
[
1
];
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
RULEBOOKITERATOR
(
SparseToDense_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
SparseToDense_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
nPlanes
,
spatialVolume
,
rbB
,
nHotB
);
oF
,
nPlanes
,
spatialVolume
,
rbB
,
nHotB
);
,
oF
++
;)
// todo check ++ or +=spatialVolume????zzz
,
oF
+=
nPlanes
*
spatialVolume
;)
}
}
}
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCITensor
*
rulesBuffer
)
{
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
if
(
input_features
->
n
Dimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
auto
spatialVolume
=
_rules
.
s
ize
(
);
long
spatialVolume
=
THLongTensor_prodall
(
inputS
ize
);
uInt
nPlanes
=
d_input_features
->
size
[
1
];
uInt
nPlanes
=
d_input_features
->
size
[
1
];
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
RULEBOOKITERATOR
(
SparseToDense_BackwardPass
<
real
>
(
SparseToDense_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
diF
,
THCState_getCurrentStream
(
state
),
diF
,
doF
,
nPlanes
,
doF
,
nPlanes
,
spatialVolume
,
rbB
,
nHotB
);
spatialVolume
,
rbB
,
nHotB
);
,
doF
++
;)
,
doF
+=
nPlanes
*
spatialVolume
;)
}
}
}
#endif
#endif
PyTorch/sparseconvnet/SCN/generic/GPU/SparseToDense.h
View file @
3ff930b7
...
@@ -12,7 +12,8 @@
...
@@ -12,7 +12,8 @@
// NTX must be >=2 so r is filled properly
// NTX must be >=2 so r is filled properly
template
<
typename
T
,
uInt
NTX
,
uInt
NTY
>
template
<
typename
T
,
uInt
NTX
,
uInt
NTY
>
__global__
void
SparseToDense_fp
(
T
*
input_features
,
T
*
output_features
,
__global__
void
SparseToDense_fp
(
T
*
input_features
,
T
*
output_features
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
nHot
)
{
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
nHot
)
{
__shared__
uInt
r
[
NTY
*
2
];
__shared__
uInt
r
[
NTY
*
2
];
for
(
uInt
n
=
blockIdx
.
x
*
NTY
;
n
<
nHot
;
n
+=
gridDim
.
x
*
NTY
)
{
for
(
uInt
n
=
blockIdx
.
x
*
NTY
;
n
<
nHot
;
n
+=
gridDim
.
x
*
NTY
)
{
{
{
...
@@ -22,10 +23,10 @@ __global__ void SparseToDense_fp(T *input_features, T *output_features,
...
@@ -22,10 +23,10 @@ __global__ void SparseToDense_fp(T *input_features, T *output_features,
}
}
__syncthreads
();
__syncthreads
();
if
(
n
+
threadIdx
.
y
<
nHot
)
{
if
(
n
+
threadIdx
.
y
<
nHot
)
{
T
*
i
=
&
input_features
[
r
[
2
*
threadIdx
.
y
]
*
nPlanes
]
;
T
*
i
=
input_features
+
r
[
2
*
threadIdx
.
y
]
*
nPlanes
;
T
*
o
=
&
output_features
[
r
[
2
*
threadIdx
.
y
+
1
]
*
spatialVolume
*
nPlanes
];
T
*
o
=
output_features
+
r
[
2
*
threadIdx
.
y
+
1
];
for
(
uInt
plane
=
threadIdx
.
x
;
plane
<
nPlanes
;
plane
+=
NTX
)
for
(
uInt
plane
=
threadIdx
.
x
;
plane
<
nPlanes
;
plane
+=
NTX
)
o
[
plane
*
spatialVolume
]
=
i
[
plane
];
o
[
plane
*
spatialVolume
]
=
i
[
plane
];
}
}
__syncthreads
();
__syncthreads
();
}
}
...
@@ -34,15 +35,15 @@ __global__ void SparseToDense_fp(T *input_features, T *output_features,
...
@@ -34,15 +35,15 @@ __global__ void SparseToDense_fp(T *input_features, T *output_features,
template
<
typename
T
>
template
<
typename
T
>
void
SparseToDense_ForwardPass
(
cudaStream_t
stream
,
T
*
input_features
,
void
SparseToDense_ForwardPass
(
cudaStream_t
stream
,
T
*
input_features
,
T
*
output_features
,
uInt
nPlanes
,
T
*
output_features
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
nHot
)
{
uInt
*
rules
,
uInt
nHot
)
{
SparseToDense_fp
<
T
,
32
,
32
>
<<
<
32
,
dim3
(
32
,
32
),
0
,
stream
>>>
SparseToDense_fp
<
T
,
32
,
32
><<<
32
,
dim3
(
32
,
32
),
0
,
stream
>>>
(
(
input_features
,
output_features
,
nPlanes
,
spatialVolume
,
rules
,
nHot
);
input_features
,
output_features
,
nPlanes
,
spatialVolume
,
rules
,
nHot
);
}
}
// NTX must be >=2 so r is filled properly
// NTX must be >=2 so r is filled properly
template
<
typename
T
,
uInt
NTX
,
uInt
NTY
>
template
<
typename
T
,
uInt
NTX
,
uInt
NTY
>
__global__
void
SparseToDense_bp
(
T
*
d_input_features
,
T
*
d_output_features
,
__global__
void
SparseToDense_bp
(
T
*
d_input_features
,
T
*
d_output_features
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
nHot
)
{
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
nHot
)
{
__shared__
uInt
r
[
NTY
*
2
];
__shared__
uInt
r
[
NTY
*
2
];
for
(
uInt
n
=
blockIdx
.
x
*
NTY
;
n
<
nHot
;
n
+=
gridDim
.
x
*
NTY
)
{
for
(
uInt
n
=
blockIdx
.
x
*
NTY
;
n
<
nHot
;
n
+=
gridDim
.
x
*
NTY
)
{
{
{
...
@@ -52,10 +53,10 @@ __global__ void SparseToDense_bp(T *d_input_features, T *d_output_features,
...
@@ -52,10 +53,10 @@ __global__ void SparseToDense_bp(T *d_input_features, T *d_output_features,
}
}
__syncthreads
();
__syncthreads
();
if
(
n
+
threadIdx
.
y
<
nHot
)
{
if
(
n
+
threadIdx
.
y
<
nHot
)
{
T
*
i
=
&
d_input_features
[
r
[
2
*
threadIdx
.
y
]
*
nPlanes
]
;
T
*
d_
i
=
d_input_features
+
r
[
2
*
threadIdx
.
y
]
*
nPlanes
;
T
*
o
=
&
d_output_features
[
r
[
2
*
threadIdx
.
y
+
1
]
*
spatialVolume
*
nPlanes
];
T
*
d_
o
=
d_output_features
+
r
[
2
*
threadIdx
.
y
+
1
];
for
(
uInt
plane
=
threadIdx
.
x
;
plane
<
nPlanes
;
plane
+=
NTX
)
for
(
uInt
plane
=
threadIdx
.
x
;
plane
<
nPlanes
;
plane
+=
NTX
)
i
[
plane
]
=
o
[
plane
*
spatialVolume
];
d_
i
[
plane
]
=
d_
o
[
plane
*
spatialVolume
];
}
}
__syncthreads
();
__syncthreads
();
}
}
...
@@ -64,9 +65,9 @@ __global__ void SparseToDense_bp(T *d_input_features, T *d_output_features,
...
@@ -64,9 +65,9 @@ __global__ void SparseToDense_bp(T *d_input_features, T *d_output_features,
template
<
typename
T
>
template
<
typename
T
>
void
SparseToDense_BackwardPass
(
cudaStream_t
stream
,
T
*
d_input_features
,
void
SparseToDense_BackwardPass
(
cudaStream_t
stream
,
T
*
d_input_features
,
T
*
d_output_features
,
uInt
nPlanes
,
T
*
d_output_features
,
uInt
nPlanes
,
uInt
spatialVolume
,
uInt
spatialVolume
,
uInt
*
rules
,
uInt
nHot
)
{
uInt
*
rules
,
uInt
nHot
)
{
SparseToDense_bp
<
T
,
32
,
32
>
<<
<
32
,
dim3
(
32
,
32
),
0
,
stream
>>>
SparseToDense_bp
<
T
,
32
,
32
><<<
32
,
dim3
(
32
,
32
),
0
,
stream
>>>
(
(
d_input_features
,
d_output_features
,
nPlanes
,
spatialVolume
,
rules
,
d_input_features
,
d_output_features
,
nPlanes
,
spatialVolume
,
rules
,
nHot
);
nHot
);
}
}
#endif
/* GPU_SPARSETODENSE_H */
#endif
/* GPU_SPARSETODENSE_H */
PyTorch/sparseconvnet/SCN/generic/GPU/THGenerateCudaFloatTypes.h
View file @
3ff930b7
...
@@ -27,4 +27,22 @@
...
@@ -27,4 +27,22 @@
#undef TH_REAL_IS_FLOAT
#undef TH_REAL_IS_FLOAT
#undef THBLAS_GEMM
#undef THBLAS_GEMM
// double
// #define real double
// #define accreal double
// #define Real Double
// #define CReal CudaDouble
// #define TH_REAL_IS_DOUBLE
// #define THBLAS_GEMM THCudaBlas_Dgemm
// #line 1 TH_GENERIC_FILE
// #include TH_GENERIC_FILE
// #undef accreal
// #undef real
// #undef Real
// #undef CReal
// #undef TH_REAL_IS_DOUBLE
// #undef THBLAS_GEMM
#undef TH_GENERIC_FILE
#undef TH_GENERIC_FILE
PyTorch/sparseconvnet/SCN/generic/Geometry/ConvolutionRules.h
View file @
3ff930b7
...
@@ -103,20 +103,26 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
...
@@ -103,20 +103,26 @@ uInt Convolution_InputSgsToRulesAndOutputSgs_OMP(
return
output_nActive
;
return
output_nActive
;
}
}
// for each site in filterVolume, list of (inputFeatureNumber,batchIdx) pairs
// for each active site, list of (inputFeatureNumber,batchIdx, spatialOffset)
// triples
template
<
uInt
dimension
>
template
<
uInt
dimension
>
void
SparseToDense_InputSgsToRulesAndOutputSgs
(
void
SparseToDense_InputSgsToRulesAndOutputSgs
(
SparseGrids
<
dimension
>
&
input_SGs
,
RuleBook
&
rules
,
long
*
spatialSize
)
{
SparseGrids
<
dimension
>
&
input_SGs
,
RuleBook
&
rules
,
long
*
spatialSize
)
{
uInt
batchSize
=
input_SGs
.
size
();
uInt
batchSize
=
input_SGs
.
size
();
SparseGrids
<
dimension
>
output_SGs
(
batchSize
);
std
::
vector
<
long
>
ones
(
dimension
,
1
);
rules
.
clear
();
rules
.
clear
();
for
(
uInt
i
=
0
;
i
<
batchSize
;
i
++
)
{
rules
.
resize
(
batchSize
);
auto
&
iSG
=
input_SGs
[
i
];
Point
<
dimension
>
lb
,
ub
;
auto
&
oSG
=
output_SGs
[
i
];
for
(
int
i
=
0
;
i
<
dimension
;
++
i
)
{
oSG
.
ctr
=
i
;
// batchIdx
lb
[
i
]
=
0
;
Convolution_InputSgToRulesAndOutputSg
<
dimension
>
(
ub
[
i
]
=
spatialSize
[
i
]
-
1
;
iSG
,
oSG
,
rules
,
spatialSize
,
&
ones
[
0
],
spatialSize
,
&
ones
[
0
]);
}
auto
region
=
RectangularRegion
<
dimension
>
(
lb
,
ub
);
for
(
uInt
batchIdx
=
0
;
batchIdx
<
batchSize
;
batchIdx
++
)
{
auto
&
iSG
=
input_SGs
[
batchIdx
];
for
(
auto
const
&
inIter
:
iSG
.
mp
)
{
rules
[
batchIdx
].
push_back
(
inIter
.
second
+
iSG
.
ctr
);
rules
[
batchIdx
].
push_back
(
region
.
offset
(
inIter
.
first
));
}
}
}
}
}
...
@@ -124,33 +130,21 @@ template <uInt dimension>
...
@@ -124,33 +130,21 @@ template <uInt dimension>
void
SparseToDense_InputSgsToRulesAndOutputSgs_OMP
(
void
SparseToDense_InputSgsToRulesAndOutputSgs_OMP
(
SparseGrids
<
dimension
>
&
input_SGs
,
RuleBook
&
rules
,
long
*
spatialSize
)
{
SparseGrids
<
dimension
>
&
input_SGs
,
RuleBook
&
rules
,
long
*
spatialSize
)
{
uInt
batchSize
=
input_SGs
.
size
();
uInt
batchSize
=
input_SGs
.
size
();
SparseGrids
<
dimension
>
output_SGs
(
batchSize
);
std
::
vector
<
long
>
ones
(
dimension
,
1
);
rules
.
clear
();
rules
.
clear
();
rules
.
resize
(
volume
<
dimension
>
(
spatialSize
));
rules
.
resize
(
batchSize
);
std
::
vector
<
RuleBook
>
rbs
(
batchSize
);
Point
<
dimension
>
lb
,
ub
;
{
for
(
int
i
=
0
;
i
<
dimension
;
++
i
)
{
uInt
i
;
lb
[
i
]
=
0
;
#pragma omp parallel for private(i)
ub
[
i
]
=
spatialSize
[
i
]
-
1
;
for
(
i
=
0
;
i
<
batchSize
;
i
++
)
{
}
output_SGs
[
i
].
ctr
=
i
;
// batchIdx
auto
region
=
RectangularRegion
<
dimension
>
(
lb
,
ub
);
Convolution_InputSgToRulesAndOutputSg
<
dimension
>
(
uInt
batchIdx
;
input_SGs
[
i
],
output_SGs
[
i
],
rbs
[
i
],
spatialSize
,
&
ones
[
0
],
#pragma omp parallel for private(batchIdx)
spatialSize
,
&
ones
[
0
]);
for
(
batchIdx
=
0
;
batchIdx
<
batchSize
;
batchIdx
++
)
{
}
auto
&
iSG
=
input_SGs
[
batchIdx
];
}
for
(
auto
const
&
inIter
:
iSG
.
mp
)
{
{
rules
[
batchIdx
].
push_back
(
inIter
.
second
+
iSG
.
ctr
);
uInt
i
;
rules
[
batchIdx
].
push_back
(
region
.
offset
(
inIter
.
first
));
#pragma omp parallel for private(i)
for
(
i
=
0
;
i
<
rules
.
size
();
i
++
)
{
auto
&
R
=
rules
[
i
];
for
(
uInt
j
=
0
;
j
<
batchSize
;
j
++
)
{
auto
&
r
=
rbs
[
j
][
i
];
for
(
uInt
k
=
0
;
k
<
r
.
size
();)
{
R
.
push_back
(
r
[
k
++
]);
R
.
push_back
(
r
[
k
++
]);
}
}
}
}
}
}
}
}
...
...
PyTorch/sparseconvnet/SCN/generic/Geometry/Metadata.cpp
View file @
3ff930b7
...
@@ -125,16 +125,15 @@ extern "C" void scn_D_(getSpatialLocations)(void **m, THLongTensor *spatialSize,
...
@@ -125,16 +125,15 @@ extern "C" void scn_D_(getSpatialLocations)(void **m, THLongTensor *spatialSize,
}
}
extern
"C"
void
extern
"C"
void
scn_D_
(
createMetadataForDenseToSparse
)(
void
**
m
,
THLongTensor
*
spatialSize_
,
scn_D_
(
createMetadataForDenseToSparse
)(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad_
,
THLongTensor
*
nz_
,
THLongTensor
*
nz_
,
long
batchSize
)
{
long
batchSize
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
clear
();
_m
.
setInputSpatialSize
(
spatialSize_
);
_m
.
setInputSpatialSize
(
spatialSize_
);
_m
.
inputSGs
->
resize
(
batchSize
);
_m
.
inputSGs
->
resize
(
batchSize
);
auto
&
nActive
=
*
_m
.
inputNActive
;
auto
&
nActive
=
*
_m
.
inputNActive
;
nActive
=
nz_
->
size
[
0
];
nActive
=
nz_
->
size
[
0
];
auto
nz
=
THLongTensor_data
(
nz_
);
auto
nz
=
THLongTensor_data
(
nz_
);
auto
pad
=
THLongTensor_data
(
pad_
);
auto
spatialSize
=
THLongTensor_data
(
spatialSize_
);
auto
spatialSize
=
THLongTensor_data
(
spatialSize_
);
std
::
vector
<
uInt
>
br
(
batchSize
+
1
);
std
::
vector
<
uInt
>
br
(
batchSize
+
1
);
...
@@ -157,8 +156,7 @@ scn_D_(createMetadataForDenseToSparse)(void **m, THLongTensor *spatialSize_,
...
@@ -157,8 +156,7 @@ scn_D_(createMetadataForDenseToSparse)(void **m, THLongTensor *spatialSize_,
for
(
uInt
i
=
br
[
b
];
i
<
br
[
b
+
1
];
i
++
)
{
for
(
uInt
i
=
br
[
b
];
i
<
br
[
b
+
1
];
i
++
)
{
Point
<
Dimension
>
x
;
Point
<
Dimension
>
x
;
for
(
uInt
j
=
0
;
j
<
Dimension
;
j
++
)
{
for
(
uInt
j
=
0
;
j
<
Dimension
;
j
++
)
{
x
[
j
]
=
nz
[
i
*
(
Dimension
+
1
)
+
j
+
1
]
+
x
[
j
]
=
nz
[
i
*
(
Dimension
+
1
)
+
j
+
1
];
// 0-indexed
pad
[
b
*
Dimension
+
j
];
// 0-indexed
}
}
sg
.
mp
[
x
]
=
i
;
sg
.
mp
[
x
]
=
i
;
}
}
...
@@ -281,6 +279,7 @@ extern "C" void scn_D_(generateRuleBooks2s2)(void **m) {
...
@@ -281,6 +279,7 @@ extern "C" void scn_D_(generateRuleBooks2s2)(void **m) {
p2
[
i
]
=
p3
[
i
]
=
inS
[
i
]
=
outS
[
i
];
p2
[
i
]
=
p3
[
i
]
=
inS
[
i
]
=
outS
[
i
];
}
}
}
}
extern
"C"
void
scn_D_
(
freeMetadata
)(
void
**
m
)
{
extern
"C"
void
scn_D_
(
freeMetadata
)(
void
**
m
)
{
SCN_DELETE
(
Metadata
<
Dimension
>
,
m
)
SCN_DELETE
(
Metadata
<
Dimension
>
,
m
)
}
}
...
...
PyTorch/sparseconvnet/SCN/generic/Geometry/Metadata.h
View file @
3ff930b7
...
@@ -11,7 +11,6 @@
...
@@ -11,7 +11,6 @@
#include "ActivePoolingRules.h"
#include "ActivePoolingRules.h"
#include "ConvolutionRules.h"
#include "ConvolutionRules.h"
#include "ValidConvolutionRules.h"
#include "ValidConvolutionRules.h"
#include <iostream>
#include <tuple>
#include <tuple>
#include <unordered_map>
#include <unordered_map>
...
@@ -40,6 +39,18 @@ public:
...
@@ -40,6 +39,18 @@ public:
uInt
*
inputNActive
;
uInt
*
inputNActive
;
Metadata
()
{}
Metadata
()
{}
void
clear
()
{
nActive
.
clear
();
grids
.
clear
();
activePoolingRuleBooks
.
clear
();
validRuleBooks
.
clear
();
ruleBooks
.
clear
();
sparseToDenseRuleBooks
.
clear
();
inputSGs
=
nullptr
;
inputSG
=
nullptr
;
inputNActive
=
nullptr
;
}
void
setInputSpatialSize
(
THLongTensor
*
spatialSize
)
{
void
setInputSpatialSize
(
THLongTensor
*
spatialSize
)
{
inputSpatialSize
=
LongTensorToPoint
<
dimension
>
(
spatialSize
);
inputSpatialSize
=
LongTensorToPoint
<
dimension
>
(
spatialSize
);
inputSGs
=
&
grids
[
inputSpatialSize
];
inputSGs
=
&
grids
[
inputSpatialSize
];
...
...
PyTorch/sparseconvnet/SCN/generic/Geometry/ValidConvolutionRules.h
View file @
3ff930b7
...
@@ -6,8 +6,6 @@
...
@@ -6,8 +6,6 @@
#ifndef VALIDCONVOLUTIONRULES_H
#ifndef VALIDCONVOLUTIONRULES_H
#define VALIDCONVOLUTIONRULES_H
#define VALIDCONVOLUTIONRULES_H
#include<iostream>
// Full input region for an output point
// Full input region for an output point
template
<
uInt
dimension
>
template
<
uInt
dimension
>
...
@@ -26,8 +24,8 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
...
@@ -26,8 +24,8 @@ InputRegionCalculator_Valid(const Point<dimension> &output, long *size) {
// rules is used to carry out the "lowering" whilst carrying out the convolution
// rules is used to carry out the "lowering" whilst carrying out the convolution
template
<
uInt
dimension
>
template
<
uInt
dimension
>
double
ValidConvolution_SgToRules
(
SparseGrid
<
dimension
>
&
grid
,
double
ValidConvolution_SgToRules
(
SparseGrid
<
dimension
>
&
grid
,
RuleBook
&
rules
,
RuleBook
&
rules
,
long
*
size
)
{
long
*
size
)
{
uInt
sd
=
volume
<
dimension
>
(
size
);
uInt
sd
=
volume
<
dimension
>
(
size
);
double
countActiveInputs
=
0
;
double
countActiveInputs
=
0
;
for
(
auto
const
&
outputIter
:
grid
.
mp
)
{
for
(
auto
const
&
outputIter
:
grid
.
mp
)
{
...
@@ -48,8 +46,8 @@ double ValidConvolution_SgToRules(SparseGrid<dimension> &grid,
...
@@ -48,8 +46,8 @@ double ValidConvolution_SgToRules(SparseGrid<dimension> &grid,
}
}
template
<
uInt
dimension
>
template
<
uInt
dimension
>
uInt
ValidConvolution_SgsToRules
(
SparseGrids
<
dimension
>
&
SGs
,
uInt
ValidConvolution_SgsToRules
(
SparseGrids
<
dimension
>
&
SGs
,
RuleBook
&
rules
,
RuleBook
&
rules
,
long
*
size
)
{
long
*
size
)
{
uInt
sd
=
volume
<
dimension
>
(
size
);
uInt
sd
=
volume
<
dimension
>
(
size
);
uInt
countActiveInputs
=
0
;
uInt
countActiveInputs
=
0
;
rules
.
clear
();
rules
.
clear
();
...
...
PyTorch/sparseconvnet/SCN/header_cpu.h
View file @
3ff930b7
...
@@ -8,13 +8,12 @@ long scn_readPtr(void **ptr);
...
@@ -8,13 +8,12 @@ long scn_readPtr(void **ptr);
void
scn_writePtr
(
long
p
,
void
**
ptr
);
void
scn_writePtr
(
long
p
,
void
**
ptr
);
double
scn_ruleBookBits
(
void
);
double
scn_ruleBookBits
(
void
);
void
scn_2_drawCurve
(
void
**
m
,
THFloatTensor
*
features
,
THFloatTensor
*
stroke
);
void
scn_2_drawCurve
(
void
**
m
,
THFloatTensor
*
features
,
THFloatTensor
*
stroke
);
double
scn_1_addSampleFromThresholdedTensor
(
double
scn_1_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_1_batchAddSample
(
void
**
m
);
void
scn_1_batchAddSample
(
void
**
m
);
void
scn_1_createMetadataForDenseToSparse
(
void
scn_1_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_1_freeMetadata
(
void
**
metadata
);
void
scn_1_freeMetadata
(
void
**
metadata
);
void
scn_1_generateRuleBooks3s2
(
void
**
m
);
void
scn_1_generateRuleBooks3s2
(
void
**
m
);
void
scn_1_generateRuleBooks2s2
(
void
**
m
);
void
scn_1_generateRuleBooks2s2
(
void
**
m
);
...
@@ -25,13 +24,13 @@ void scn_1_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -25,13 +24,13 @@ void scn_1_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_1_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_1_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_2_addSampleFromThresholdedTensor
(
double
scn_2_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_2_batchAddSample
(
void
**
m
);
void
scn_2_batchAddSample
(
void
**
m
);
void
scn_2_createMetadataForDenseToSparse
(
void
scn_2_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_2_freeMetadata
(
void
**
metadata
);
void
scn_2_freeMetadata
(
void
**
metadata
);
void
scn_2_generateRuleBooks3s2
(
void
**
m
);
void
scn_2_generateRuleBooks3s2
(
void
**
m
);
void
scn_2_generateRuleBooks2s2
(
void
**
m
);
void
scn_2_generateRuleBooks2s2
(
void
**
m
);
...
@@ -42,13 +41,13 @@ void scn_2_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -42,13 +41,13 @@ void scn_2_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_2_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_2_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_3_addSampleFromThresholdedTensor
(
double
scn_3_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_3_batchAddSample
(
void
**
m
);
void
scn_3_batchAddSample
(
void
**
m
);
void
scn_3_createMetadataForDenseToSparse
(
void
scn_3_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_3_freeMetadata
(
void
**
metadata
);
void
scn_3_freeMetadata
(
void
**
metadata
);
void
scn_3_generateRuleBooks3s2
(
void
**
m
);
void
scn_3_generateRuleBooks3s2
(
void
**
m
);
void
scn_3_generateRuleBooks2s2
(
void
**
m
);
void
scn_3_generateRuleBooks2s2
(
void
**
m
);
...
@@ -59,13 +58,13 @@ void scn_3_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -59,13 +58,13 @@ void scn_3_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_3_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_3_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_4_addSampleFromThresholdedTensor
(
double
scn_4_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_4_batchAddSample
(
void
**
m
);
void
scn_4_batchAddSample
(
void
**
m
);
void
scn_4_createMetadataForDenseToSparse
(
void
scn_4_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_4_freeMetadata
(
void
**
metadata
);
void
scn_4_freeMetadata
(
void
**
metadata
);
void
scn_4_generateRuleBooks3s2
(
void
**
m
);
void
scn_4_generateRuleBooks3s2
(
void
**
m
);
void
scn_4_generateRuleBooks2s2
(
void
**
m
);
void
scn_4_generateRuleBooks2s2
(
void
**
m
);
...
@@ -76,13 +75,13 @@ void scn_4_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -76,13 +75,13 @@ void scn_4_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_4_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_4_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_5_addSampleFromThresholdedTensor
(
double
scn_5_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_5_batchAddSample
(
void
**
m
);
void
scn_5_batchAddSample
(
void
**
m
);
void
scn_5_createMetadataForDenseToSparse
(
void
scn_5_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_5_freeMetadata
(
void
**
metadata
);
void
scn_5_freeMetadata
(
void
**
metadata
);
void
scn_5_generateRuleBooks3s2
(
void
**
m
);
void
scn_5_generateRuleBooks3s2
(
void
**
m
);
void
scn_5_generateRuleBooks2s2
(
void
**
m
);
void
scn_5_generateRuleBooks2s2
(
void
**
m
);
...
@@ -93,13 +92,13 @@ void scn_5_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -93,13 +92,13 @@ void scn_5_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_5_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_5_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_6_addSampleFromThresholdedTensor
(
double
scn_6_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_6_batchAddSample
(
void
**
m
);
void
scn_6_batchAddSample
(
void
**
m
);
void
scn_6_createMetadataForDenseToSparse
(
void
scn_6_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_6_freeMetadata
(
void
**
metadata
);
void
scn_6_freeMetadata
(
void
**
metadata
);
void
scn_6_generateRuleBooks3s2
(
void
**
m
);
void
scn_6_generateRuleBooks3s2
(
void
**
m
);
void
scn_6_generateRuleBooks2s2
(
void
**
m
);
void
scn_6_generateRuleBooks2s2
(
void
**
m
);
...
@@ -110,13 +109,13 @@ void scn_6_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -110,13 +109,13 @@ void scn_6_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_6_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_6_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_7_addSampleFromThresholdedTensor
(
double
scn_7_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_7_batchAddSample
(
void
**
m
);
void
scn_7_batchAddSample
(
void
**
m
);
void
scn_7_createMetadataForDenseToSparse
(
void
scn_7_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_7_freeMetadata
(
void
**
metadata
);
void
scn_7_freeMetadata
(
void
**
metadata
);
void
scn_7_generateRuleBooks3s2
(
void
**
m
);
void
scn_7_generateRuleBooks3s2
(
void
**
m
);
void
scn_7_generateRuleBooks2s2
(
void
**
m
);
void
scn_7_generateRuleBooks2s2
(
void
**
m
);
...
@@ -127,13 +126,13 @@ void scn_7_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -127,13 +126,13 @@ void scn_7_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_7_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_7_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_8_addSampleFromThresholdedTensor
(
double
scn_8_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_8_batchAddSample
(
void
**
m
);
void
scn_8_batchAddSample
(
void
**
m
);
void
scn_8_createMetadataForDenseToSparse
(
void
scn_8_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_8_freeMetadata
(
void
**
metadata
);
void
scn_8_freeMetadata
(
void
**
metadata
);
void
scn_8_generateRuleBooks3s2
(
void
**
m
);
void
scn_8_generateRuleBooks3s2
(
void
**
m
);
void
scn_8_generateRuleBooks2s2
(
void
**
m
);
void
scn_8_generateRuleBooks2s2
(
void
**
m
);
...
@@ -144,13 +143,13 @@ void scn_8_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -144,13 +143,13 @@ void scn_8_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_8_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_8_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_9_addSampleFromThresholdedTensor
(
double
scn_9_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_9_batchAddSample
(
void
**
m
);
void
scn_9_batchAddSample
(
void
**
m
);
void
scn_9_createMetadataForDenseToSparse
(
void
scn_9_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_9_freeMetadata
(
void
**
metadata
);
void
scn_9_freeMetadata
(
void
**
metadata
);
void
scn_9_generateRuleBooks3s2
(
void
**
m
);
void
scn_9_generateRuleBooks3s2
(
void
**
m
);
void
scn_9_generateRuleBooks2s2
(
void
**
m
);
void
scn_9_generateRuleBooks2s2
(
void
**
m
);
...
@@ -161,13 +160,13 @@ void scn_9_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -161,13 +160,13 @@ void scn_9_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_9_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_9_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
double
scn_10_addSampleFromThresholdedTensor
(
double
scn_10_addSampleFromThresholdedTensor
(
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
void
**
m
,
THFloatTensor
*
features_
,
THFloatTensor
*
tensor_
,
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
THLongTensor
*
offset_
,
THLongTensor
*
spatialSize_
,
float
threshold
);
void
scn_10_batchAddSample
(
void
**
m
);
void
scn_10_batchAddSample
(
void
**
m
);
void
scn_10_createMetadataForDenseToSparse
(
void
scn_10_createMetadataForDenseToSparse
(
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
pad
,
THLongTensor
*
nz
,
void
**
m
,
THLongTensor
*
spatialSize_
,
THLongTensor
*
nz
,
long
batchSize
);
long
batchSize
);
void
scn_10_freeMetadata
(
void
**
metadata
);
void
scn_10_freeMetadata
(
void
**
metadata
);
void
scn_10_generateRuleBooks3s2
(
void
**
m
);
void
scn_10_generateRuleBooks3s2
(
void
**
m
);
void
scn_10_generateRuleBooks2s2
(
void
**
m
);
void
scn_10_generateRuleBooks2s2
(
void
**
m
);
...
@@ -178,7 +177,8 @@ void scn_10_setInputSpatialLocations(void **m, THFloatTensor *features,
...
@@ -178,7 +177,8 @@ void scn_10_setInputSpatialLocations(void **m, THFloatTensor *features,
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
THLongTensor
*
locations
,
THFloatTensor
*
vecs
,
_Bool
overwrite
);
void
scn_10_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
void
scn_10_getSpatialLocations
(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
locations
);
THLongTensor
*
locations
);
void
scn_cpu_float_AffineReluTrivialConvolution_updateOutput
(
void
scn_cpu_float_AffineReluTrivialConvolution_updateOutput
(
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
THFloatTensor
*
affineWeight
,
THFloatTensor
*
affineBias
,
THFloatTensor
*
convWeight
);
THFloatTensor
*
affineWeight
,
THFloatTensor
*
affineBias
,
THFloatTensor
*
convWeight
);
void
scn_cpu_float_AffineReluTrivialConvolution_backward
(
void
scn_cpu_float_AffineReluTrivialConvolution_backward
(
...
@@ -348,7 +348,7 @@ void scn_cpu_float1MaxPooling_updateGradInput(
...
@@ -348,7 +348,7 @@ void scn_cpu_float1MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float1SparseToDense_updateOutput
(
void
scn_cpu_float1SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float1SparseToDense_updateGradInput
(
void
scn_cpu_float1SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -425,7 +425,7 @@ void scn_cpu_float2MaxPooling_updateGradInput(
...
@@ -425,7 +425,7 @@ void scn_cpu_float2MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float2SparseToDense_updateOutput
(
void
scn_cpu_float2SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float2SparseToDense_updateGradInput
(
void
scn_cpu_float2SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -502,7 +502,7 @@ void scn_cpu_float3MaxPooling_updateGradInput(
...
@@ -502,7 +502,7 @@ void scn_cpu_float3MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float3SparseToDense_updateOutput
(
void
scn_cpu_float3SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float3SparseToDense_updateGradInput
(
void
scn_cpu_float3SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -579,7 +579,7 @@ void scn_cpu_float4MaxPooling_updateGradInput(
...
@@ -579,7 +579,7 @@ void scn_cpu_float4MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float4SparseToDense_updateOutput
(
void
scn_cpu_float4SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float4SparseToDense_updateGradInput
(
void
scn_cpu_float4SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -656,7 +656,7 @@ void scn_cpu_float5MaxPooling_updateGradInput(
...
@@ -656,7 +656,7 @@ void scn_cpu_float5MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float5SparseToDense_updateOutput
(
void
scn_cpu_float5SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float5SparseToDense_updateGradInput
(
void
scn_cpu_float5SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -733,7 +733,7 @@ void scn_cpu_float6MaxPooling_updateGradInput(
...
@@ -733,7 +733,7 @@ void scn_cpu_float6MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float6SparseToDense_updateOutput
(
void
scn_cpu_float6SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float6SparseToDense_updateGradInput
(
void
scn_cpu_float6SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -810,7 +810,7 @@ void scn_cpu_float7MaxPooling_updateGradInput(
...
@@ -810,7 +810,7 @@ void scn_cpu_float7MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float7SparseToDense_updateOutput
(
void
scn_cpu_float7SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float7SparseToDense_updateGradInput
(
void
scn_cpu_float7SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -887,7 +887,7 @@ void scn_cpu_float8MaxPooling_updateGradInput(
...
@@ -887,7 +887,7 @@ void scn_cpu_float8MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float8SparseToDense_updateOutput
(
void
scn_cpu_float8SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float8SparseToDense_updateGradInput
(
void
scn_cpu_float8SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -964,7 +964,7 @@ void scn_cpu_float9MaxPooling_updateGradInput(
...
@@ -964,7 +964,7 @@ void scn_cpu_float9MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float9SparseToDense_updateOutput
(
void
scn_cpu_float9SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float9SparseToDense_updateGradInput
(
void
scn_cpu_float9SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -1041,7 +1041,8 @@ void scn_cpu_float10MaxPooling_updateGradInput(
...
@@ -1041,7 +1041,8 @@ void scn_cpu_float10MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_float10SparseToDense_updateOutput
(
void
scn_cpu_float10SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
output_features
,
void
*
rulesBuffer
);
THFloatTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_float10SparseToDense_updateGradInput
(
void
scn_cpu_float10SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THFloatTensor
*
input_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
THFloatTensor
*
d_input_features
,
THFloatTensor
*
d_output_features
,
...
@@ -1118,7 +1119,7 @@ void scn_cpu_double1MaxPooling_updateGradInput(
...
@@ -1118,7 +1119,7 @@ void scn_cpu_double1MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double1SparseToDense_updateOutput
(
void
scn_cpu_double1SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double1SparseToDense_updateGradInput
(
void
scn_cpu_double1SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1195,7 +1196,7 @@ void scn_cpu_double2MaxPooling_updateGradInput(
...
@@ -1195,7 +1196,7 @@ void scn_cpu_double2MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double2SparseToDense_updateOutput
(
void
scn_cpu_double2SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double2SparseToDense_updateGradInput
(
void
scn_cpu_double2SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1272,7 +1273,7 @@ void scn_cpu_double3MaxPooling_updateGradInput(
...
@@ -1272,7 +1273,7 @@ void scn_cpu_double3MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double3SparseToDense_updateOutput
(
void
scn_cpu_double3SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double3SparseToDense_updateGradInput
(
void
scn_cpu_double3SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1349,7 +1350,7 @@ void scn_cpu_double4MaxPooling_updateGradInput(
...
@@ -1349,7 +1350,7 @@ void scn_cpu_double4MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double4SparseToDense_updateOutput
(
void
scn_cpu_double4SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double4SparseToDense_updateGradInput
(
void
scn_cpu_double4SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1426,7 +1427,7 @@ void scn_cpu_double5MaxPooling_updateGradInput(
...
@@ -1426,7 +1427,7 @@ void scn_cpu_double5MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double5SparseToDense_updateOutput
(
void
scn_cpu_double5SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double5SparseToDense_updateGradInput
(
void
scn_cpu_double5SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1503,7 +1504,7 @@ void scn_cpu_double6MaxPooling_updateGradInput(
...
@@ -1503,7 +1504,7 @@ void scn_cpu_double6MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double6SparseToDense_updateOutput
(
void
scn_cpu_double6SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double6SparseToDense_updateGradInput
(
void
scn_cpu_double6SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1580,7 +1581,7 @@ void scn_cpu_double7MaxPooling_updateGradInput(
...
@@ -1580,7 +1581,7 @@ void scn_cpu_double7MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double7SparseToDense_updateOutput
(
void
scn_cpu_double7SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double7SparseToDense_updateGradInput
(
void
scn_cpu_double7SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1657,7 +1658,7 @@ void scn_cpu_double8MaxPooling_updateGradInput(
...
@@ -1657,7 +1658,7 @@ void scn_cpu_double8MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double8SparseToDense_updateOutput
(
void
scn_cpu_double8SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double8SparseToDense_updateGradInput
(
void
scn_cpu_double8SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1734,7 +1735,7 @@ void scn_cpu_double9MaxPooling_updateGradInput(
...
@@ -1734,7 +1735,7 @@ void scn_cpu_double9MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double9SparseToDense_updateOutput
(
void
scn_cpu_double9SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double9SparseToDense_updateGradInput
(
void
scn_cpu_double9SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
@@ -1811,7 +1812,7 @@ void scn_cpu_double10MaxPooling_updateGradInput(
...
@@ -1811,7 +1812,7 @@ void scn_cpu_double10MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_cpu_double10SparseToDense_updateOutput
(
void
scn_cpu_double10SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
);
THDoubleTensor
*
output_features
,
void
*
rulesBuffer
,
long
nPlanes
);
void
scn_cpu_double10SparseToDense_updateGradInput
(
void
scn_cpu_double10SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THDoubleTensor
*
input_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
THDoubleTensor
*
d_input_features
,
THDoubleTensor
*
d_output_features
,
...
...
PyTorch/sparseconvnet/SCN/header_gpu.h
View file @
3ff930b7
...
@@ -120,7 +120,8 @@ void scn_gpu_float1MaxPooling_updateGradInput(
...
@@ -120,7 +120,8 @@ void scn_gpu_float1MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float1SparseToDense_updateOutput
(
void
scn_gpu_float1SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float1SparseToDense_updateGradInput
(
void
scn_gpu_float1SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -197,7 +198,7 @@ void scn_gpu_float2MaxPooling_updateGradInput(
...
@@ -197,7 +198,7 @@ void scn_gpu_float2MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float2SparseToDense_updateOutput
(
void
scn_gpu_float2SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float2SparseToDense_updateGradInput
(
void
scn_gpu_float2SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -274,7 +275,7 @@ void scn_gpu_float3MaxPooling_updateGradInput(
...
@@ -274,7 +275,7 @@ void scn_gpu_float3MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float3SparseToDense_updateOutput
(
void
scn_gpu_float3SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float3SparseToDense_updateGradInput
(
void
scn_gpu_float3SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -351,7 +352,7 @@ void scn_gpu_float4MaxPooling_updateGradInput(
...
@@ -351,7 +352,7 @@ void scn_gpu_float4MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float4SparseToDense_updateOutput
(
void
scn_gpu_float4SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float4SparseToDense_updateGradInput
(
void
scn_gpu_float4SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -428,7 +429,7 @@ void scn_gpu_float5MaxPooling_updateGradInput(
...
@@ -428,7 +429,7 @@ void scn_gpu_float5MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float5SparseToDense_updateOutput
(
void
scn_gpu_float5SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float5SparseToDense_updateGradInput
(
void
scn_gpu_float5SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -505,7 +506,7 @@ void scn_gpu_float6MaxPooling_updateGradInput(
...
@@ -505,7 +506,7 @@ void scn_gpu_float6MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float6SparseToDense_updateOutput
(
void
scn_gpu_float6SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float6SparseToDense_updateGradInput
(
void
scn_gpu_float6SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -582,7 +583,7 @@ void scn_gpu_float7MaxPooling_updateGradInput(
...
@@ -582,7 +583,7 @@ void scn_gpu_float7MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float7SparseToDense_updateOutput
(
void
scn_gpu_float7SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float7SparseToDense_updateGradInput
(
void
scn_gpu_float7SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -659,7 +660,7 @@ void scn_gpu_float8MaxPooling_updateGradInput(
...
@@ -659,7 +660,7 @@ void scn_gpu_float8MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float8SparseToDense_updateOutput
(
void
scn_gpu_float8SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float8SparseToDense_updateGradInput
(
void
scn_gpu_float8SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -736,7 +737,7 @@ void scn_gpu_float9MaxPooling_updateGradInput(
...
@@ -736,7 +737,7 @@ void scn_gpu_float9MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float9SparseToDense_updateOutput
(
void
scn_gpu_float9SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float9SparseToDense_updateGradInput
(
void
scn_gpu_float9SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
@@ -813,7 +814,7 @@ void scn_gpu_float10MaxPooling_updateGradInput(
...
@@ -813,7 +814,7 @@ void scn_gpu_float10MaxPooling_updateGradInput(
// SparseToDense
// SparseToDense
void
scn_gpu_float10SparseToDense_updateOutput
(
void
scn_gpu_float10SparseToDense_updateOutput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
);
THCudaTensor
*
output_features
,
THCudaIntTensor
*
rulesBuffer
,
long
nPlanes
);
void
scn_gpu_float10SparseToDense_updateGradInput
(
void
scn_gpu_float10SparseToDense_updateGradInput
(
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THLongTensor
*
inputSize
,
void
**
m
,
THCudaTensor
*
input_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
THCudaTensor
*
d_input_features
,
THCudaTensor
*
d_output_features
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment