Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
SparseConvNet
Commits
2c4ed608
Commit
2c4ed608
authored
Jun 20, 2018
by
Benjamin Thomas Graham
Browse files
Goodbye THNN. Hello ATen!
parent
6d4475db
Changes
145
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
2134 deletions
+0
-2134
sparseconvnet/SCN/generic/CPU/Deconvolution.h
sparseconvnet/SCN/generic/CPU/Deconvolution.h
+0
-128
sparseconvnet/SCN/generic/CPU/IOLayers.cpp
sparseconvnet/SCN/generic/CPU/IOLayers.cpp
+0
-181
sparseconvnet/SCN/generic/CPU/LeakyReLU.cpp
sparseconvnet/SCN/generic/CPU/LeakyReLU.cpp
+0
-37
sparseconvnet/SCN/generic/CPU/MaxPooling.cpp
sparseconvnet/SCN/generic/CPU/MaxPooling.cpp
+0
-110
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.cpp
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.cpp
+0
-124
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.h
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.h
+0
-128
sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
+0
-61
sparseconvnet/SCN/generic/CPU/THGenerateDimFloatTypes.h
sparseconvnet/SCN/generic/CPU/THGenerateDimFloatTypes.h
+0
-63
sparseconvnet/SCN/generic/CPU/THGenerateFloatTypes.h
sparseconvnet/SCN/generic/CPU/THGenerateFloatTypes.h
+0
-37
sparseconvnet/SCN/generic/CPU/UnPooling.cpp
sparseconvnet/SCN/generic/CPU/UnPooling.cpp
+0
-60
sparseconvnet/SCN/generic/GPU/ActivePooling.cu
sparseconvnet/SCN/generic/GPU/ActivePooling.cu
+0
-73
sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
...seconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
+0
-50
sparseconvnet/SCN/generic/GPU/AveragePooling.cu
sparseconvnet/SCN/generic/GPU/AveragePooling.cu
+0
-58
sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
+0
-94
sparseconvnet/SCN/generic/GPU/Convolution.cu
sparseconvnet/SCN/generic/GPU/Convolution.cu
+0
-313
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
+0
-87
sparseconvnet/SCN/generic/GPU/IOLayers.cu
sparseconvnet/SCN/generic/GPU/IOLayers.cu
+0
-250
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
+0
-36
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
+0
-103
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
+0
-141
No files found.
sparseconvnet/SCN/generic/CPU/Deconvolution.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_Deconvolution_H
#define CPU_Deconvolution_H
#include "../SparseConvNet.h"
#include <cstring>
// buffer must have size >= nHot * (nIn+nOut)
template
<
typename
T
>
void
Deconvolution_ForwardPass
(
T
*
input_features
,
uInt
input_nPlanes
,
uInt
input_nPLANES
,
T
*
output_features
,
uInt
output_nPlanes
,
uInt
output_nPLANES
,
T
*
weight
,
T
*
bias
,
RuleBook
&
rules
,
uInt
output_nActive
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
if
(
bias
!=
nullptr
)
// Set bias
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
column
=
0
;
column
<
output_nPlanes
;
column
++
)
output_features
[
row
*
output_nPLANES
+
column
]
=
bias
[
column
];
std
::
vector
<
T
>
input_buffer
,
output_buffer
;
for
(
auto
&
r
:
rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
input_buffer
.
resize
(
nHot
*
input_nPlanes
);
output_buffer
.
resize
(
nHot
*
output_nPlanes
);
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
std
::
memcpy
(
&
input_buffer
[
row
*
input_nPlanes
],
input_features
+
r
[
2
*
row
+
1
]
*
input_nPLANES
,
sizeof
(
T
)
*
input_nPlanes
);
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is l*m (row-major)
// weight is m*r (row-major)
// output_buffer is l*r (row-major)
// buffer * weights -> output_buffers
(
*
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
nHot
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
&
input_buffer
[
0
],
input_nPlanes
,
// m
0
,
// beta
&
output_buffer
[
0
],
output_nPlanes
// r
);
weight
+=
input_nPlanes
*
output_nPlanes
;
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
{
T
*
b
=
&
output_buffer
[
row
*
output_nPlanes
];
T
*
o
=
&
output_features
[
r
[
2
*
row
]
*
output_nPLANES
];
for
(
uInt
k
=
0
;
k
<
output_nPlanes
;
k
++
)
o
[
k
]
+=
b
[
k
];
}
}
}
template
<
typename
T
>
void
Deconvolution_BackwardPass
(
T
*
input_features
,
T
*
d_input_features
,
uInt
input_nPlanes
,
uInt
input_nPLANES
,
T
*
d_output_features
,
uInt
output_nPlanes
,
uInt
output_nPLANES
,
T
*
weight
,
T
*
d_weight
,
T
*
d_bias
,
RuleBook
&
rules
,
uInt
output_nActive
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
if
(
d_bias
)
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
++
)
d_bias
[
i
]
+=
d_output_features
[
row
*
output_nPLANES
+
i
];
std
::
vector
<
T
>
input_buffer
,
output_buffer
;
for
(
auto
&
r
:
rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
input_buffer
.
resize
(
nHot
*
input_nPlanes
);
output_buffer
.
resize
(
nHot
*
output_nPlanes
);
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
std
::
memcpy
(
&
output_buffer
[
row
*
output_nPlanes
],
&
d_output_features
[
r
[
2
*
row
]
*
output_nPLANES
],
sizeof
(
T
)
*
output_nPlanes
);
// Do GEMM (note: gemm assumes column-major matrices)
// output_buffer is l*m (row-major)
// weights is r*m (row-major)
// input_buffer is l*r (row-major)
// output_buffer * T(weight) -> input_buffer
(
*
gemm
)(
't'
,
'n'
,
input_nPlanes
,
// r
nHot
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
&
output_buffer
[
0
],
output_nPlanes
,
// m
0
,
// beta
&
input_buffer
[
0
],
input_nPlanes
// r
);
weight
+=
input_nPlanes
*
output_nPlanes
;
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
{
T
*
b
=
&
input_buffer
[
row
*
input_nPlanes
];
T
*
i
=
&
d_input_features
[
r
[
2
*
row
+
1
]
*
input_nPLANES
];
for
(
uInt
k
=
0
;
k
<
input_nPlanes
;
k
++
)
i
[
k
]
+=
b
[
k
];
}
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
std
::
memcpy
(
&
input_buffer
[
row
*
input_nPlanes
],
input_features
+
r
[
2
*
row
+
1
]
*
input_nPLANES
,
sizeof
(
T
)
*
input_nPlanes
);
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is m*l (row-major)
// output_buffer is m*r (row-major)
// d_weights is l*r (row-major)
// T(input_buffer) * output_buffer -> d_weight
(
*
gemm
)(
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
,
// l
nHot
,
// m
1
,
// alpha
&
output_buffer
[
0
],
output_nPlanes
,
// r
&
input_buffer
[
0
],
input_nPlanes
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
d_weight
+=
input_nPlanes
*
output_nPlanes
;
}
}
#endif
/* CPU_Deconvolution_H */
sparseconvnet/SCN/generic/CPU/IOLayers.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/IOLayers.cpp"
#else
#include "IOLayers.h"
extern
"C"
void
scn_DR_
(
InputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
batchSize
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
inputLayer
(
spatialSize
,
input_coords
,
batchSize
,
mode
);
auto
nPlanes
=
input_features
->
size
[
1
];
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
}
else
{
THTensor_
(
resize2d
)(
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
InputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
}
else
{
THTensor_
(
resize2d
)(
d_input_features
,
rules
[
0
][
2
],
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
d_input_features
),
THTensor_
(
data
)(
d_output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateOutput
)(
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
}
else
{
THTensor_
(
resize2d
)(
output_features
,
rules
[
0
][
2
],
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
}
else
{
THTensor_
(
resize2d
)(
d_input_features
,
nRows
,
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
d_output_features
),
THTensor_
(
data
)(
d_input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
blLayer
(
spatialSize
,
input_coords
,
mode
);
auto
nPlanes
=
input_features
->
size
[
2
];
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
THTensor_
(
resize2d
)(
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
}
else
{
THTensor_
(
resize2d
)(
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
THTensor_
(
resize3d
)(
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THTensor_
(
resize3d
)(
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
d_input_features
),
THTensor_
(
data
)(
d_output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateOutput
)(
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
THTensor_
(
resize3d
)(
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THTensor_
(
resize3d
)(
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
2
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
THTensor_
(
resize2d
)(
d_input_features
,
nRows
,
nPlanes
);
}
else
{
THTensor_
(
resize2d
)(
d_input_features
,
nRows
,
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
d_output_features
),
THTensor_
(
data
)(
d_input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
#endif
sparseconvnet/SCN/generic/CPU/LeakyReLU.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/LeakyReLU.cpp"
#else
extern
"C"
void
scn_R_
(
LeakyReLU_updateOutput
)(
THTensor
*
input_features
,
THTensor
*
output_features
,
float
alpha
)
{
if
(
input_features
!=
output_features
)
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
n
=
THTensor_
(
nElement
)(
input_features
);
for
(
uInt
i
=
0
;
i
<
n
;
i
++
)
oF
[
i
]
=
(
iF
[
i
]
>
0
)
?
iF
[
i
]
:
iF
[
i
]
*
alpha
;
}
extern
"C"
void
scn_R_
(
LeakyReLU_updateGradInput
)(
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
,
float
alpha
)
{
if
(
d_input_features
!=
d_output_features
)
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
auto
n
=
THTensor_
(
nElement
)(
d_input_features
);
for
(
uInt
i
=
0
;
i
<
n
;
i
++
)
diF
[
i
]
=
(
iF
[
i
]
>
0
)
?
doF
[
i
]
:
doF
[
i
]
*
alpha
;
}
#endif
sparseconvnet/SCN/generic/CPU/MaxPooling.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/MaxPooling.cpp"
#else
#include "MaxPooling.h"
extern
"C"
void
scn_DR_
(
MaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THTensor_
(
zero
)(
output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THTensor_
(
data
)(
output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
extern
"C"
void
scn_DR_
(
MaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
output_features
,
THTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_BackwardPass
<
real
>
(
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THTensor_
(
zero
)(
output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THTensor_
(
data
)(
output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
output_features
,
THTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_BackwardPass
<
real
>
(
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
#endif
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/NetworkInNetwork.cpp"
#else
extern
"C"
double
scn_R_
(
NetworkInNetwork_updateOutput
)(
THTensor
*
input_features_
,
THTensor
*
output_features_
,
THTensor
*
weight_
,
THTensor
*
bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THTensor_
(
resize2d
)(
output_features_
,
nActive
,
output_nPlanes
);
auto
input_features
=
THTensor_
(
data
)(
input_features_
);
auto
output_features
=
THTensor_
(
data
)(
output_features_
);
auto
weight
=
THTensor_
(
data
)(
weight_
);
if
(
bias_
!=
nullptr
)
{
// Set bias
auto
bias
=
THTensor_
(
data
)(
bias_
);
for
(
uInt
row
=
0
;
row
<
nActive
;
row
++
)
for
(
uInt
column
=
0
;
column
<
output_nPlanes
;
column
++
)
output_features
[
row
*
output_nPlanes
+
column
]
=
bias
[
column
];
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is l*m (row-major)
// weight is r*m (row-major)
// output_features is l*r (row-major)
// buffer * T(weights) + bias -> output_features
THBlas_
(
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
1
,
// beta
output_features
,
output_nPlanes
// r
);
}
else
{
THTensor_
(
zero
)(
output_features_
);
THBlas_
(
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
0
,
// beta
output_features
,
output_nPlanes
// r
);
}
return
nActive
*
input_nPlanes
*
output_nPlanes
;
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_updateGradInput
)(
THTensor
*
d_input_features_
,
THTensor
*
d_output_features_
,
THTensor
*
weight_
)
{
auto
nActive
=
d_output_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THTensor_
(
resize2d
)(
d_input_features_
,
nActive
,
input_nPlanes
);
THTensor_
(
zero
)(
d_input_features_
);
auto
d_input_features
=
THTensor_
(
data
)(
d_input_features_
);
auto
d_output_features
=
THTensor_
(
data
)(
d_output_features_
);
auto
weight
=
THTensor_
(
data
)(
weight_
);
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is l*m (row-major)
// weights is m*r (row-major)
// d_buffer is l*r (row-major)
// d_output_features * weight -> d_buffer
THBlas_
(
gemm
)(
't'
,
'n'
,
input_nPlanes
,
// r
nActive
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
d_output_features
,
output_nPlanes
,
// m
0
,
// beta
d_input_features
,
input_nPlanes
// r
);
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_accGradParameters
)(
THTensor
*
input_features_
,
THTensor
*
d_output_features_
,
THTensor
*
d_weight_
,
THTensor
*
d_bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
d_weight_
->
size
[
0
];
auto
output_nPlanes
=
d_weight_
->
size
[
1
];
auto
input_features
=
THTensor_
(
data
)(
input_features_
);
auto
d_output_features
=
THTensor_
(
data
)(
d_output_features_
);
auto
d_weight
=
THTensor_
(
data
)(
d_weight_
);
auto
d_bias
=
d_bias_
and
THTensor_
(
data
)(
d_bias_
);
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is m*l (row-major)
// buffer is m*r (row-major)
// weights is l*r (row-major)
// T(d_output_features) * buffer -> d_weight
THBlas_
(
gemm
)(
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
,
// l
nActive
,
// m
1
,
// alpha
d_output_features
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
if
(
d_bias_
)
{
auto
d_bias
=
THTensor_
(
data
)(
d_bias_
);
for
(
uInt
row
=
0
;
row
<
nActive
;
row
++
)
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
++
)
d_bias
[
i
]
+=
d_output_features
[
row
*
output_nPlanes
+
i
];
}
}
#endif
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_NetworkInNetwork_H
#define CPU_NetworkInNetwork_H
#include "../SparseConvNet.h"
#include "Convolution.h"
// buffer must have size >= output_nActive * filterVolume * input_nPlanes
template
<
typename
T
>
void
NetworkInNetwork_ForwardPass
(
T
*
input_features
,
uInt
input_nPlanes
,
T
*
output_features
,
uInt
output_nPlanes
,
T
*
weight
,
T
*
bias
,
uInt
output_nActive
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
if
(
bias
!=
nullptr
)
{
// Set bias
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
column
=
0
;
column
<
output_nPlanes
;
column
++
)
output_features
[
row
*
output_nPlanes
+
column
]
=
bias
[
column
];
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is l*m (row-major)
// weight is r*m (row-major)
// output_features is l*r (row-major)
// buffer * T(weights) + bias -> output_features
(
*
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
output_nActive
,
// l
input_nPlanes
*
filterVolume
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
buffer
,
input_nPlanes
*
filterVolume
,
// m
1
,
// beta
output_features
,
output_nPlanes
// r
);
}
else
{
(
*
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
output_nActive
,
// l
input_nPlanes
*
filterVolume
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
buffer
,
input_nPlanes
*
filterVolume
,
// m
0
,
// beta
output_features
,
output_nPlanes
// r
);
}
}
template
<
typename
T
>
void
NetworkInNetwork_BackwardPass
(
T
*
d_input_features
,
uInt
input_nPlanes
,
T
*
d_output_features
,
uInt
output_nPlanes
,
T
*
weight
,
uInt
*
rules
,
uInt
filterVolume
,
uInt
output_nActive
,
T
*
d_buffer
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is l*m (row-major)
// weights is m*r (row-major)
// d_buffer is l*r (row-major)
// d_output_features * weight -> d_buffer
(
*
gemm
)(
't'
,
'n'
,
input_nPlanes
*
filterVolume
,
// r
output_nActive
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
d_output_features
,
output_nPlanes
,
// m
0
,
// beta
d_buffer
,
input_nPlanes
*
filterVolume
// r
);
// Use rules and d_buffer to accumulate gradient information into d_input
for
(
uInt
row
=
0
;
row
<
output_nActive
*
filterVolume
;
row
++
)
{
auto
r
=
rules
[
row
];
if
(
r
!=
uInt_MAX
)
// 2^32-1
for
(
uInt
i
=
0
;
i
<
input_nPlanes
;
i
++
)
d_input_features
[
r
*
input_nPlanes
+
i
]
+=
d_buffer
[
row
*
input_nPlanes
+
i
];
}
}
template
<
typename
T
>
void
NetworkInNetwork_GradWeights
(
T
*
input_features
,
uInt
input_nPlanes
,
T
*
d_output_features
,
uInt
output_nPlanes
,
T
*
d_weight
,
T
*
d_bias
,
uInt
*
rules
,
uInt
filterVolume
,
uInt
output_nActive
,
T
*
buffer
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
// d_weight
// Use input_features and rules to fill buffer
for
(
uInt
row
=
0
;
row
<
output_nActive
*
filterVolume
;
row
++
)
{
if
(
rules
[
row
]
==
uInt_MAX
)
{
// 2^32-1
std
::
memset
(
buffer
+
row
*
input_nPlanes
,
0
,
sizeof
(
T
)
*
input_nPlanes
);
}
else
{
std
::
memcpy
(
buffer
+
row
*
input_nPlanes
,
input_features
+
rules
[
row
]
*
input_nPlanes
,
sizeof
(
T
)
*
input_nPlanes
);
}
}
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is m*l (row-major)
// buffer is m*r (row-major)
// weights is l*r (row-major)
// T(d_output_features) * buffer -> d_weight
(
*
gemm
)(
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
*
filterVolume
,
// l
output_nActive
,
// m
1
,
// alpha
d_output_features
,
output_nPlanes
,
// r
buffer
,
input_nPlanes
*
filterVolume
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
if
(
d_bias
)
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
++
)
d_bias
[
i
]
+=
d_output_features
[
row
*
output_nPlanes
+
i
];
}
#endif
/* CPU_NetworkInNetwork_H */
sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/SparseToDense.cpp"
#else
#include "SparseToDense.h"
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nPlanes
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
{
long
sz
[
Dimension
+
2
];
sz
[
0
]
=
_m
.
grids
.
begin
()
->
second
.
size
();
//batch size
sz
[
1
]
=
nPlanes
;
std
::
memcpy
(
sz
+
2
,
THLongTensor_data
(
inputSize
),
sizeof
(
long
)
*
Dimension
);
THTensor_
(
resizeNd
)(
output_features
,
Dimension
+
2
,
sz
,
NULL
);
THTensor_
(
zero
)(
output_features
);
}
if
(
input_features
->
nDimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
uInt
_nPlanes
=
input_features
->
size
[
1
];
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
long
spatialVolume
=
THLongTensor_prodall
(
inputSize
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
SparseToDense_ForwardPass
<
real
>
(
iF
,
oF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
nHot
);
oF
+=
_nPlanes
*
spatialVolume
;
}
}
}
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
long
spatialVolume
=
THLongTensor_prodall
(
inputSize
);
uInt
_nPlanes
=
d_input_features
->
size
[
1
];
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
SparseToDense_BackwardPass
<
real
>
(
diF
,
doF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
nHot
);
doF
+=
_nPlanes
*
spatialVolume
;
}
}
}
#endif
sparseconvnet/SCN/generic/CPU/THGenerateDimFloatTypes.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#error "Define TH_GENERIC_FILE_ before including THGenerateDimFloatTypes.h"
#endif
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#define Dimension 1
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 2
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 3
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 4
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 5
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 6
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 7
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 8
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 9
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 10
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#undef TH_GENERIC_FILE_
sparseconvnet/SCN/generic/CPU/THGenerateFloatTypes.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h"
#endif
#define real float
#define accreal double
#define Real Float
#define TH_REAL_IS_FLOAT
#line 1 TH_GENERIC_FILE
#include TH_GENERIC_FILE
#undef accreal
#undef real
#undef Real
#undef TH_REAL_IS_FLOAT
#define real double
#define accreal double
#define Real Double
#define TH_REAL_IS_DOUBLE
#line 1 TH_GENERIC_FILE
#include TH_GENERIC_FILE
#undef accreal
#undef real
#undef Real
#undef TH_REAL_IS_DOUBLE
#undef TH_GENERIC_FILE
sparseconvnet/SCN/generic/CPU/UnPooling.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/UnPooling.cpp"
#else
#include "UnPooling.h"
extern
"C"
void
scn_DR_
(
UnPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THTensor_
(
zero
)(
output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THTensor_
(
data
)(
output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
UnPooling_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
&
r
[
0
],
nHot
,
_rules
.
size
());
}
}
extern
"C"
void
scn_DR_
(
UnPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
)
+
nFeaturesToDrop
;
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
UnPooling_BackwardPass
<
real
>
(
diF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
&
r
[
0
],
nHot
,
_rules
.
size
());
}
}
#endif
sparseconvnet/SCN/generic/GPU/ActivePooling.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/ActivePooling.cu"
#else
#include "ActivePooling.h"
extern
"C"
void
scn_DR_
(
ActivePooling_updateOutput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
bool
average
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
_rules
=
_m
.
getActivePoolingRuleBook
(
inputSize
);
uInt
batchSize
=
_rules
[
1
][
0
];
uInt
maxActive
=
_rules
[
1
][
1
];
THCTensor_
(
resize2d
)(
state
,
output_features
,
batchSize
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
if
(
THCITensor_
(
nElement
)(
state
,
rulesBuffer
)
<
1
<<
22
)
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
1
<<
22
);
uInt
*
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
uInt
rowBatchSize
=
std
::
min
((
uInt
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
THAssert
(
rowBatchSize
>
0
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
for
(
uInt
o
=
0
;
o
<
batchSize
;
o
+=
rowBatchSize
)
{
uInt
batchSize_
=
std
::
min
(
rowBatchSize
,
(
uInt
)(
batchSize
-
o
));
cudaMemcpy
(
rb
,
&
_rules
[
0
][
o
*
(
maxActive
+
1
)],
sizeof
(
uInt
)
*
(
maxActive
+
1
)
*
batchSize_
,
cudaMemcpyHostToDevice
);
ActivePooling_ForwardPass
<
real
>
(
iF
,
oF
+
o
*
nPlanes
,
batchSize_
,
maxActive
,
nPlanes
,
rb
,
average
);
}
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
extern
"C"
void
scn_DR_
(
ActivePooling_updateGradInput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
bool
average
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
_rules
=
_m
.
getActivePoolingRuleBook
(
inputSize
);
uInt
batchSize
=
_rules
[
1
][
0
];
uInt
maxActive
=
_rules
[
1
][
1
];
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
if
(
THCITensor_
(
nElement
)(
state
,
rulesBuffer
)
<
1
<<
22
)
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
1
<<
22
);
uInt
*
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
uInt
rowBatchSize
=
std
::
min
((
uInt
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
THAssert
(
rowBatchSize
>
0
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
for
(
uInt
o
=
0
;
o
<
batchSize
;
o
+=
rowBatchSize
)
{
uInt
batchSize_
=
std
::
min
(
rowBatchSize
,
(
uInt
)(
batchSize
-
o
));
cudaMemcpy
(
rb
,
&
_rules
[
0
][
o
*
(
maxActive
+
1
)],
sizeof
(
uInt
)
*
(
maxActive
+
1
)
*
batchSize_
,
cudaMemcpyHostToDevice
);
ActivePooling_BackwardPass
<
real
>
(
diF
,
doF
+
o
*
nPlanes
,
batchSize_
,
maxActive
,
nPlanes
,
rb
,
average
);
}
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
#endif
sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/AffineReluTrivialConvolution.cu"
#else
#include "AffineReluTrivialConvolution.h"
#include <algorithm>
extern
"C"
void
scn_R_
(
AffineReluTrivialConvolution_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
affineWeight
,
THCTensor
*
affineBias
,
THCTensor
*
convWeight
)
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
input_features
->
size
[
0
],
convWeight
->
size
[
1
]);
dAffineReluTrivialConvolution_forward
<
real
>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
output_features
),
THCTensor_
(
data
)(
state
,
affineWeight
),
THCTensor_
(
data
)(
state
,
affineBias
),
THCTensor_
(
data
)(
state
,
convWeight
),
convWeight
->
size
[
0
],
input_features
->
stride
[
0
],
convWeight
->
size
[
1
],
output_features
->
size
[
1
],
input_features
->
size
[
0
]);
}
extern
"C"
void
scn_R_
(
AffineReluTrivialConvolution_backward
)(
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
affineWeight
,
THCTensor
*
d_affineWeight
,
THCTensor
*
affineBias
,
THCTensor
*
d_affineBias
,
THCTensor
*
convWeight
,
THCTensor
*
d_convWeight
,
bool
additiveGrad
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
dAffineReluTrivialConvolution_backward_dW
<
real
>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
d_input_features
),
THCTensor_
(
data
)(
state
,
d_output_features
),
THCTensor_
(
data
)(
state
,
affineWeight
),
THCTensor_
(
data
)(
state
,
d_affineWeight
),
THCTensor_
(
data
)(
state
,
affineBias
),
THCTensor_
(
data
)(
state
,
d_affineBias
),
THCTensor_
(
data
)(
state
,
convWeight
),
THCTensor_
(
data
)(
state
,
d_convWeight
),
convWeight
->
size
[
0
],
input_features
->
stride
[
0
],
convWeight
->
size
[
1
],
d_output_features
->
stride
[
0
],
input_features
->
size
[
0
],
additiveGrad
);
}
#endif
sparseconvnet/SCN/generic/GPU/AveragePooling.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/AveragePooling.cu"
#else
#include "AveragePooling.h"
#include "RuleBookIterator.h"
extern
"C"
void
scn_DR_
(
AveragePooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
AveragePooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
,
_rules
.
size
());
,
)
}
extern
"C"
void
scn_DR_
(
AveragePooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
)
+
nFeaturesToDrop
;
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
AveragePooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
diF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
,
_rules
.
size
());
,
)
}
#endif
sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/BatchNormalization.cu"
#else
#include "BatchNormalization.h"
#define BN_F_MACRO(N) \
if (nPlanes % N == 0) { \
BatchNormalization_ForwardPass<real, N, 64>( \
THCTensor_(data)(state, input_features), \
THCTensor_(data)(state, output_features), nPlanes, input_stride, \
output_stride, nActive, THCTensor_(data)(state, saveMean), \
THCTensor_(data)(state, saveInvStd), \
THCTensor_(data)(state, runningMean), \
THCTensor_(data)(state, runningVar), \
weight ? THCTensor_(data)(state, weight) : 0, \
bias ? THCTensor_(data)(state, bias) : 0, eps, momentum, train, \
leakiness); \
}
extern
"C"
void
scn_R_
(
BatchNormalization_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
saveMean
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_F_MACRO
(
16
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
}
}
extern
"C"
void
scn_R_
(
BatchNormalizationInTensor_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
saveMean
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_F_MACRO
(
16
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
}
}
#undef BN_F_MACRO
#define BN_B_MACRO(N) \
if (nPlanes % N == 0) { \
BatchNormalization_BackwardPass<real, N, 64>( \
THCTensor_(data)(state, input_features), \
THCTensor_(data)(state, d_input_features), \
THCTensor_(data)(state, output_features), \
THCTensor_(data)(state, d_output_features), nPlanes, input_stride, \
output_stride, nActive, THCTensor_(data)(state, saveMean), \
THCTensor_(data)(state, saveInvStd), \
THCTensor_(data)(state, runningMean), \
THCTensor_(data)(state, runningVar), \
weight ? THCTensor_(data)(state, weight) : 0, \
bias ? THCTensor_(data)(state, bias) : 0, \
d_weight ? THCTensor_(data)(state, d_weight) : 0, \
d_bias ? THCTensor_(data)(state, d_bias) : 0, leakiness); \
}
extern
"C"
void
scn_R_
(
BatchNormalization_backward
)(
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
THCTensor
*
saveMean
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
real
leakiness
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_B_MACRO
(
16
)
else
BN_B_MACRO
(
12
)
else
BN_B_MACRO
(
8
)
else
BN_B_MACRO
(
4
)
else
BN_B_MACRO
(
1
)
}
}
#endif
sparseconvnet/SCN/generic/GPU/Convolution.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/Convolution.cu"
#else
#include "Convolution.h"
#include "RuleBookIterator.h"
#include <algorithm>
#include <cstring>
extern
"C"
double
scn_DR_
(
Convolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
Convolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
SubmanifoldConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
filterSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSubmanifoldRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
_m
.
getNActive
(
inputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
SubmanifoldConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
filterSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSubmanifoldRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
_m
.
getNActive
(
inputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
FullConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
mIn
,
void
**
mOut
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mIn
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mOut
)
auto
_rules
=
_mIn
.
getFullConvolutionRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
_mOut
);
uInt
nActive
=
_mOut
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
FullConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
mIn
,
void
**
mOut
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mIn
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mOut
)
auto
_rules
=
_mIn
.
getFullConvolutionRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
_mOut
);
uInt
nActive
=
_mOut
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
RandomizedStrideConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
RandomizedStrideConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
#endif
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/Deconvolution.cu"
#else
#include "Convolution.h"
#include "Deconvolution.h"
#include <algorithm>
extern
"C"
double
scn_DR_
(
Deconvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
double
flops
=
0
;
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<
<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dDeconvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
return
flops
;
}
extern
"C"
void
scn_DR_
(
Deconvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dDeconvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
#endif
sparseconvnet/SCN/generic/GPU/IOLayers.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/IOLayers.cu"
#else
#include "IOLayers.h"
extern
"C"
void
scn_DR_
(
InputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
batchSize
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
inputLayer
(
spatialSize
,
input_coords
,
batchSize
,
mode
);
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
InputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
blLayer
(
spatialSize
,
input_coords
,
mode
);
uInt
nPlanes
=
input_features
->
size
[
2
];
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
}
else
{
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
uInt
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
2
];
uInt
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
#endif
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/LeakyReLU.cu"
#else
#include "LeakyReLU.h"
extern
"C"
void
scn_R_
(
LeakyReLU_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
float
alpha
)
{
if
(
input_features
!=
output_features
)
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
auto
n
=
THCTensor_
(
nElement
)(
state
,
input_features
);
LeakyReLU_fp
<
real
>
<<
<
16
,
1024
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
output_features
),
n
,
alpha
);
}
extern
"C"
void
scn_R_
(
LeakyReLU_updateGradInput
)(
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
float
alpha
)
{
if
(
d_input_features
!=
d_output_features
)
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
auto
n
=
THCTensor_
(
nElement
)(
state
,
d_input_features
);
LeakyReLU_bp
<
real
>
<<
<
16
,
1024
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
d_input_features
),
THCTensor_
(
data
)(
state
,
d_output_features
),
n
,
alpha
);
}
#endif
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/MaxPooling.cu"
#else
#include "MaxPooling.h"
#include "RuleBookIterator.h"
extern
"C"
void
scn_DR_
(
MaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
MaxPooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
MaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
MaxPooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
MaxPooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
MaxPooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
#endif
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/NetworkInNetwork.cu"
#else
#include "Convolution.h"
#include <algorithm>
extern
"C"
double
scn_R_
(
NetworkInNetwork_updateOutput
)(
THCTensor
*
input_features_
,
THCTensor
*
output_features_
,
THCTensor
*
weight_
,
THCTensor
*
bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THCTensor_
(
resize2d
)(
state
,
output_features_
,
nActive
,
output_nPlanes
);
auto
input_features
=
THCTensor_
(
data
)(
state
,
input_features_
);
auto
output_features
=
THCTensor_
(
data
)(
state
,
output_features_
);
auto
weight
=
THCTensor_
(
data
)(
state
,
weight_
);
if
(
bias_
!=
nullptr
)
{
auto
bias
=
THCTensor_
(
data
)(
state
,
bias_
);
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
output_nPlanes
-
i
);
uInt
gridDim
=
min
(
4096L
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
output_features
+
i
,
bias
+
i
,
output_nPlanes
,
output_nPlanes
,
nActive
);
}
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is l*m (row-major)
// weight is m*r (row-major)
// output_features is l*r (row-major)
// buffer * weights + bias -> output_features
THBLAS_GEMM
(
state
,
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
1
,
// beta
output_features
,
output_nPlanes
// r
);
}
else
{
THCTensor_
(
zero
)(
state
,
output_features_
);
THBLAS_GEMM
(
state
,
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
0
,
// beta
output_features
,
output_nPlanes
// r
);
}
return
nActive
*
input_nPlanes
*
output_nPlanes
;
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_updateGradInput
)(
THCTensor
*
d_input_features_
,
THCTensor
*
d_output_features_
,
THCTensor
*
weight_
)
{
auto
nActive
=
d_output_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THCTensor_
(
resize2d
)(
state
,
d_input_features_
,
nActive
,
input_nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features_
);
auto
d_input_features
=
THCTensor_
(
data
)(
state
,
d_input_features_
);
auto
d_output_features
=
THCTensor_
(
data
)(
state
,
d_output_features_
);
auto
weight
=
THCTensor_
(
data
)(
state
,
weight_
);
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is l*m (row-major)
// weights is r*m (row-major)
// d_buffer is l*r (row-major)
// d_output_features * T(weight) -> d_buffer
THBLAS_GEMM
(
state
,
't'
,
'n'
,
input_nPlanes
,
// r
nActive
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
d_output_features
,
output_nPlanes
,
// m
0
,
// beta
d_input_features
,
input_nPlanes
// r
);
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_accGradParameters
)(
THCTensor
*
input_features_
,
THCTensor
*
d_output_features_
,
THCTensor
*
d_weight_
,
THCTensor
*
d_bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
d_weight_
->
size
[
0
];
auto
output_nPlanes
=
d_weight_
->
size
[
1
];
auto
input_features
=
THCTensor_
(
data
)(
state
,
input_features_
);
auto
d_output_features
=
THCTensor_
(
data
)(
state
,
d_output_features_
);
auto
d_weight
=
THCTensor_
(
data
)(
state
,
d_weight_
);
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is m*l (row-major)
// d_output_features is m*r (row-major)
// weights is l*r (row-major)
// T(buffer) * d_output_features -> d_weight
THBLAS_GEMM
(
state
,
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
,
// l
nActive
,
// m
1
,
// alpha
d_output_features
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
if
(
d_bias_
)
{
auto
d_bias
=
THCTensor_
(
data
)(
state
,
d_bias_
);
Convolution_bp_bias
(
d_output_features
,
d_bias
,
output_nPlanes
,
output_nPlanes
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
#endif
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment