Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
SparseConvNet
Commits
2c4ed608
"pcdet/vscode:/vscode.git/clone" did not exist on "cddcf9ba4ec4a9f16f3b303723895efc870c8c2f"
Commit
2c4ed608
authored
Jun 20, 2018
by
Benjamin Thomas Graham
Browse files
Goodbye THNN. Hello ATen!
parent
6d4475db
Changes
145
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
2134 deletions
+0
-2134
sparseconvnet/SCN/generic/CPU/Deconvolution.h
sparseconvnet/SCN/generic/CPU/Deconvolution.h
+0
-128
sparseconvnet/SCN/generic/CPU/IOLayers.cpp
sparseconvnet/SCN/generic/CPU/IOLayers.cpp
+0
-181
sparseconvnet/SCN/generic/CPU/LeakyReLU.cpp
sparseconvnet/SCN/generic/CPU/LeakyReLU.cpp
+0
-37
sparseconvnet/SCN/generic/CPU/MaxPooling.cpp
sparseconvnet/SCN/generic/CPU/MaxPooling.cpp
+0
-110
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.cpp
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.cpp
+0
-124
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.h
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.h
+0
-128
sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
+0
-61
sparseconvnet/SCN/generic/CPU/THGenerateDimFloatTypes.h
sparseconvnet/SCN/generic/CPU/THGenerateDimFloatTypes.h
+0
-63
sparseconvnet/SCN/generic/CPU/THGenerateFloatTypes.h
sparseconvnet/SCN/generic/CPU/THGenerateFloatTypes.h
+0
-37
sparseconvnet/SCN/generic/CPU/UnPooling.cpp
sparseconvnet/SCN/generic/CPU/UnPooling.cpp
+0
-60
sparseconvnet/SCN/generic/GPU/ActivePooling.cu
sparseconvnet/SCN/generic/GPU/ActivePooling.cu
+0
-73
sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
...seconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
+0
-50
sparseconvnet/SCN/generic/GPU/AveragePooling.cu
sparseconvnet/SCN/generic/GPU/AveragePooling.cu
+0
-58
sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
+0
-94
sparseconvnet/SCN/generic/GPU/Convolution.cu
sparseconvnet/SCN/generic/GPU/Convolution.cu
+0
-313
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
+0
-87
sparseconvnet/SCN/generic/GPU/IOLayers.cu
sparseconvnet/SCN/generic/GPU/IOLayers.cu
+0
-250
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
+0
-36
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
+0
-103
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
+0
-141
No files found.
sparseconvnet/SCN/generic/CPU/Deconvolution.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_Deconvolution_H
#define CPU_Deconvolution_H
#include "../SparseConvNet.h"
#include <cstring>
// buffer must have size >= nHot * (nIn+nOut)
template
<
typename
T
>
void
Deconvolution_ForwardPass
(
T
*
input_features
,
uInt
input_nPlanes
,
uInt
input_nPLANES
,
T
*
output_features
,
uInt
output_nPlanes
,
uInt
output_nPLANES
,
T
*
weight
,
T
*
bias
,
RuleBook
&
rules
,
uInt
output_nActive
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
if
(
bias
!=
nullptr
)
// Set bias
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
column
=
0
;
column
<
output_nPlanes
;
column
++
)
output_features
[
row
*
output_nPLANES
+
column
]
=
bias
[
column
];
std
::
vector
<
T
>
input_buffer
,
output_buffer
;
for
(
auto
&
r
:
rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
input_buffer
.
resize
(
nHot
*
input_nPlanes
);
output_buffer
.
resize
(
nHot
*
output_nPlanes
);
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
std
::
memcpy
(
&
input_buffer
[
row
*
input_nPlanes
],
input_features
+
r
[
2
*
row
+
1
]
*
input_nPLANES
,
sizeof
(
T
)
*
input_nPlanes
);
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is l*m (row-major)
// weight is m*r (row-major)
// output_buffer is l*r (row-major)
// buffer * weights -> output_buffers
(
*
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
nHot
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
&
input_buffer
[
0
],
input_nPlanes
,
// m
0
,
// beta
&
output_buffer
[
0
],
output_nPlanes
// r
);
weight
+=
input_nPlanes
*
output_nPlanes
;
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
{
T
*
b
=
&
output_buffer
[
row
*
output_nPlanes
];
T
*
o
=
&
output_features
[
r
[
2
*
row
]
*
output_nPLANES
];
for
(
uInt
k
=
0
;
k
<
output_nPlanes
;
k
++
)
o
[
k
]
+=
b
[
k
];
}
}
}
template
<
typename
T
>
void
Deconvolution_BackwardPass
(
T
*
input_features
,
T
*
d_input_features
,
uInt
input_nPlanes
,
uInt
input_nPLANES
,
T
*
d_output_features
,
uInt
output_nPlanes
,
uInt
output_nPLANES
,
T
*
weight
,
T
*
d_weight
,
T
*
d_bias
,
RuleBook
&
rules
,
uInt
output_nActive
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
if
(
d_bias
)
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
++
)
d_bias
[
i
]
+=
d_output_features
[
row
*
output_nPLANES
+
i
];
std
::
vector
<
T
>
input_buffer
,
output_buffer
;
for
(
auto
&
r
:
rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
input_buffer
.
resize
(
nHot
*
input_nPlanes
);
output_buffer
.
resize
(
nHot
*
output_nPlanes
);
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
std
::
memcpy
(
&
output_buffer
[
row
*
output_nPlanes
],
&
d_output_features
[
r
[
2
*
row
]
*
output_nPLANES
],
sizeof
(
T
)
*
output_nPlanes
);
// Do GEMM (note: gemm assumes column-major matrices)
// output_buffer is l*m (row-major)
// weights is r*m (row-major)
// input_buffer is l*r (row-major)
// output_buffer * T(weight) -> input_buffer
(
*
gemm
)(
't'
,
'n'
,
input_nPlanes
,
// r
nHot
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
&
output_buffer
[
0
],
output_nPlanes
,
// m
0
,
// beta
&
input_buffer
[
0
],
input_nPlanes
// r
);
weight
+=
input_nPlanes
*
output_nPlanes
;
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
{
T
*
b
=
&
input_buffer
[
row
*
input_nPlanes
];
T
*
i
=
&
d_input_features
[
r
[
2
*
row
+
1
]
*
input_nPLANES
];
for
(
uInt
k
=
0
;
k
<
input_nPlanes
;
k
++
)
i
[
k
]
+=
b
[
k
];
}
for
(
uInt
row
=
0
;
row
<
nHot
;
row
++
)
std
::
memcpy
(
&
input_buffer
[
row
*
input_nPlanes
],
input_features
+
r
[
2
*
row
+
1
]
*
input_nPLANES
,
sizeof
(
T
)
*
input_nPlanes
);
// Do GEMM (note: gemm assumes column-major matrices)
// input_buffer is m*l (row-major)
// output_buffer is m*r (row-major)
// d_weights is l*r (row-major)
// T(input_buffer) * output_buffer -> d_weight
(
*
gemm
)(
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
,
// l
nHot
,
// m
1
,
// alpha
&
output_buffer
[
0
],
output_nPlanes
,
// r
&
input_buffer
[
0
],
input_nPlanes
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
d_weight
+=
input_nPlanes
*
output_nPlanes
;
}
}
#endif
/* CPU_Deconvolution_H */
sparseconvnet/SCN/generic/CPU/IOLayers.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/IOLayers.cpp"
#else
#include "IOLayers.h"
extern
"C"
void
scn_DR_
(
InputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
batchSize
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
inputLayer
(
spatialSize
,
input_coords
,
batchSize
,
mode
);
auto
nPlanes
=
input_features
->
size
[
1
];
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
}
else
{
THTensor_
(
resize2d
)(
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
InputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
}
else
{
THTensor_
(
resize2d
)(
d_input_features
,
rules
[
0
][
2
],
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
d_input_features
),
THTensor_
(
data
)(
d_output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateOutput
)(
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
}
else
{
THTensor_
(
resize2d
)(
output_features
,
rules
[
0
][
2
],
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
}
else
{
THTensor_
(
resize2d
)(
d_input_features
,
nRows
,
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
d_output_features
),
THTensor_
(
data
)(
d_input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
blLayer
(
spatialSize
,
input_coords
,
mode
);
auto
nPlanes
=
input_features
->
size
[
2
];
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
THTensor_
(
resize2d
)(
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
}
else
{
THTensor_
(
resize2d
)(
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
input_features
),
THTensor_
(
data
)(
output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
THTensor_
(
resize3d
)(
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THTensor_
(
resize3d
)(
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
d_input_features
),
THTensor_
(
data
)(
d_output_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateOutput
)(
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
THTensor_
(
copy
)(
output_features
,
input_features
);
THTensor_
(
resize3d
)(
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THTensor_
(
resize3d
)(
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THTensor_
(
zero
)(
output_features
);
InputLayer_BackwardPass
<
real
>
(
THTensor_
(
data
)(
output_features
),
THTensor_
(
data
)(
input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateGradInput
)(
void
**
m
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
auto
nPlanes
=
d_output_features
->
size
[
2
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
THTensor_
(
copy
)(
d_input_features
,
d_output_features
);
THTensor_
(
resize2d
)(
d_input_features
,
nRows
,
nPlanes
);
}
else
{
THTensor_
(
resize2d
)(
d_input_features
,
nRows
,
nPlanes
);
THTensor_
(
zero
)(
d_input_features
);
InputLayer_ForwardPass
<
real
>
(
THTensor_
(
data
)(
d_output_features
),
THTensor_
(
data
)(
d_input_features
),
nRows
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
#endif
sparseconvnet/SCN/generic/CPU/LeakyReLU.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/LeakyReLU.cpp"
#else
extern
"C"
void
scn_R_
(
LeakyReLU_updateOutput
)(
THTensor
*
input_features
,
THTensor
*
output_features
,
float
alpha
)
{
if
(
input_features
!=
output_features
)
THTensor_
(
resizeAs
)(
output_features
,
input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
n
=
THTensor_
(
nElement
)(
input_features
);
for
(
uInt
i
=
0
;
i
<
n
;
i
++
)
oF
[
i
]
=
(
iF
[
i
]
>
0
)
?
iF
[
i
]
:
iF
[
i
]
*
alpha
;
}
extern
"C"
void
scn_R_
(
LeakyReLU_updateGradInput
)(
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
,
float
alpha
)
{
if
(
d_input_features
!=
d_output_features
)
THTensor_
(
resizeAs
)(
d_input_features
,
d_output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
auto
n
=
THTensor_
(
nElement
)(
d_input_features
);
for
(
uInt
i
=
0
;
i
<
n
;
i
++
)
diF
[
i
]
=
(
iF
[
i
]
>
0
)
?
doF
[
i
]
:
doF
[
i
]
*
alpha
;
}
#endif
sparseconvnet/SCN/generic/CPU/MaxPooling.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/MaxPooling.cpp"
#else
#include "MaxPooling.h"
extern
"C"
void
scn_DR_
(
MaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THTensor_
(
zero
)(
output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THTensor_
(
data
)(
output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
extern
"C"
void
scn_DR_
(
MaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
output_features
,
THTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_BackwardPass
<
real
>
(
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THTensor_
(
zero
)(
output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THTensor_
(
data
)(
output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
output_features
,
THTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
MaxPooling_BackwardPass
<
real
>
(
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
stride
[
0
],
output_features
->
stride
[
0
],
&
r
[
0
],
nHot
);
}
}
#endif
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/CPU/NetworkInNetwork.cpp"
#else
extern
"C"
double
scn_R_
(
NetworkInNetwork_updateOutput
)(
THTensor
*
input_features_
,
THTensor
*
output_features_
,
THTensor
*
weight_
,
THTensor
*
bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THTensor_
(
resize2d
)(
output_features_
,
nActive
,
output_nPlanes
);
auto
input_features
=
THTensor_
(
data
)(
input_features_
);
auto
output_features
=
THTensor_
(
data
)(
output_features_
);
auto
weight
=
THTensor_
(
data
)(
weight_
);
if
(
bias_
!=
nullptr
)
{
// Set bias
auto
bias
=
THTensor_
(
data
)(
bias_
);
for
(
uInt
row
=
0
;
row
<
nActive
;
row
++
)
for
(
uInt
column
=
0
;
column
<
output_nPlanes
;
column
++
)
output_features
[
row
*
output_nPlanes
+
column
]
=
bias
[
column
];
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is l*m (row-major)
// weight is r*m (row-major)
// output_features is l*r (row-major)
// buffer * T(weights) + bias -> output_features
THBlas_
(
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
1
,
// beta
output_features
,
output_nPlanes
// r
);
}
else
{
THTensor_
(
zero
)(
output_features_
);
THBlas_
(
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
0
,
// beta
output_features
,
output_nPlanes
// r
);
}
return
nActive
*
input_nPlanes
*
output_nPlanes
;
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_updateGradInput
)(
THTensor
*
d_input_features_
,
THTensor
*
d_output_features_
,
THTensor
*
weight_
)
{
auto
nActive
=
d_output_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THTensor_
(
resize2d
)(
d_input_features_
,
nActive
,
input_nPlanes
);
THTensor_
(
zero
)(
d_input_features_
);
auto
d_input_features
=
THTensor_
(
data
)(
d_input_features_
);
auto
d_output_features
=
THTensor_
(
data
)(
d_output_features_
);
auto
weight
=
THTensor_
(
data
)(
weight_
);
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is l*m (row-major)
// weights is m*r (row-major)
// d_buffer is l*r (row-major)
// d_output_features * weight -> d_buffer
THBlas_
(
gemm
)(
't'
,
'n'
,
input_nPlanes
,
// r
nActive
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
d_output_features
,
output_nPlanes
,
// m
0
,
// beta
d_input_features
,
input_nPlanes
// r
);
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_accGradParameters
)(
THTensor
*
input_features_
,
THTensor
*
d_output_features_
,
THTensor
*
d_weight_
,
THTensor
*
d_bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
d_weight_
->
size
[
0
];
auto
output_nPlanes
=
d_weight_
->
size
[
1
];
auto
input_features
=
THTensor_
(
data
)(
input_features_
);
auto
d_output_features
=
THTensor_
(
data
)(
d_output_features_
);
auto
d_weight
=
THTensor_
(
data
)(
d_weight_
);
auto
d_bias
=
d_bias_
and
THTensor_
(
data
)(
d_bias_
);
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is m*l (row-major)
// buffer is m*r (row-major)
// weights is l*r (row-major)
// T(d_output_features) * buffer -> d_weight
THBlas_
(
gemm
)(
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
,
// l
nActive
,
// m
1
,
// alpha
d_output_features
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
if
(
d_bias_
)
{
auto
d_bias
=
THTensor_
(
data
)(
d_bias_
);
for
(
uInt
row
=
0
;
row
<
nActive
;
row
++
)
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
++
)
d_bias
[
i
]
+=
d_output_features
[
row
*
output_nPlanes
+
i
];
}
}
#endif
sparseconvnet/SCN/generic/CPU/NetworkInNetwork.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_NetworkInNetwork_H
#define CPU_NetworkInNetwork_H
#include "../SparseConvNet.h"
#include "Convolution.h"
// buffer must have size >= output_nActive * filterVolume * input_nPlanes
template
<
typename
T
>
void
NetworkInNetwork_ForwardPass
(
T
*
input_features
,
uInt
input_nPlanes
,
T
*
output_features
,
uInt
output_nPlanes
,
T
*
weight
,
T
*
bias
,
uInt
output_nActive
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
if
(
bias
!=
nullptr
)
{
// Set bias
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
column
=
0
;
column
<
output_nPlanes
;
column
++
)
output_features
[
row
*
output_nPlanes
+
column
]
=
bias
[
column
];
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is l*m (row-major)
// weight is r*m (row-major)
// output_features is l*r (row-major)
// buffer * T(weights) + bias -> output_features
(
*
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
output_nActive
,
// l
input_nPlanes
*
filterVolume
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
buffer
,
input_nPlanes
*
filterVolume
,
// m
1
,
// beta
output_features
,
output_nPlanes
// r
);
}
else
{
(
*
gemm
)(
'n'
,
'n'
,
output_nPlanes
,
// r
output_nActive
,
// l
input_nPlanes
*
filterVolume
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
buffer
,
input_nPlanes
*
filterVolume
,
// m
0
,
// beta
output_features
,
output_nPlanes
// r
);
}
}
template
<
typename
T
>
void
NetworkInNetwork_BackwardPass
(
T
*
d_input_features
,
uInt
input_nPlanes
,
T
*
d_output_features
,
uInt
output_nPlanes
,
T
*
weight
,
uInt
*
rules
,
uInt
filterVolume
,
uInt
output_nActive
,
T
*
d_buffer
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is l*m (row-major)
// weights is m*r (row-major)
// d_buffer is l*r (row-major)
// d_output_features * weight -> d_buffer
(
*
gemm
)(
't'
,
'n'
,
input_nPlanes
*
filterVolume
,
// r
output_nActive
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
d_output_features
,
output_nPlanes
,
// m
0
,
// beta
d_buffer
,
input_nPlanes
*
filterVolume
// r
);
// Use rules and d_buffer to accumulate gradient information into d_input
for
(
uInt
row
=
0
;
row
<
output_nActive
*
filterVolume
;
row
++
)
{
auto
r
=
rules
[
row
];
if
(
r
!=
uInt_MAX
)
// 2^32-1
for
(
uInt
i
=
0
;
i
<
input_nPlanes
;
i
++
)
d_input_features
[
r
*
input_nPlanes
+
i
]
+=
d_buffer
[
row
*
input_nPlanes
+
i
];
}
}
template
<
typename
T
>
void
NetworkInNetwork_GradWeights
(
T
*
input_features
,
uInt
input_nPlanes
,
T
*
d_output_features
,
uInt
output_nPlanes
,
T
*
d_weight
,
T
*
d_bias
,
uInt
*
rules
,
uInt
filterVolume
,
uInt
output_nActive
,
T
*
buffer
,
void
(
*
gemm
)(
char
transa
,
char
transb
,
long
m
,
long
n
,
long
k
,
T
alpha
,
T
*
a
,
long
lda
,
T
*
b
,
long
ldb
,
T
beta
,
T
*
c
,
long
ldc
))
{
// d_weight
// Use input_features and rules to fill buffer
for
(
uInt
row
=
0
;
row
<
output_nActive
*
filterVolume
;
row
++
)
{
if
(
rules
[
row
]
==
uInt_MAX
)
{
// 2^32-1
std
::
memset
(
buffer
+
row
*
input_nPlanes
,
0
,
sizeof
(
T
)
*
input_nPlanes
);
}
else
{
std
::
memcpy
(
buffer
+
row
*
input_nPlanes
,
input_features
+
rules
[
row
]
*
input_nPlanes
,
sizeof
(
T
)
*
input_nPlanes
);
}
}
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is m*l (row-major)
// buffer is m*r (row-major)
// weights is l*r (row-major)
// T(d_output_features) * buffer -> d_weight
(
*
gemm
)(
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
*
filterVolume
,
// l
output_nActive
,
// m
1
,
// alpha
d_output_features
,
output_nPlanes
,
// r
buffer
,
input_nPlanes
*
filterVolume
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
if
(
d_bias
)
for
(
uInt
row
=
0
;
row
<
output_nActive
;
row
++
)
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
++
)
d_bias
[
i
]
+=
d_output_features
[
row
*
output_nPlanes
+
i
];
}
#endif
/* CPU_NetworkInNetwork_H */
sparseconvnet/SCN/generic/CPU/SparseToDense.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/SparseToDense.cpp"
#else
#include "SparseToDense.h"
extern
"C"
void
scn_DR_
(
SparseToDense_updateOutput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nPlanes
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
{
long
sz
[
Dimension
+
2
];
sz
[
0
]
=
_m
.
grids
.
begin
()
->
second
.
size
();
//batch size
sz
[
1
]
=
nPlanes
;
std
::
memcpy
(
sz
+
2
,
THLongTensor_data
(
inputSize
),
sizeof
(
long
)
*
Dimension
);
THTensor_
(
resizeNd
)(
output_features
,
Dimension
+
2
,
sz
,
NULL
);
THTensor_
(
zero
)(
output_features
);
}
if
(
input_features
->
nDimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
uInt
_nPlanes
=
input_features
->
size
[
1
];
auto
iF
=
THTensor_
(
data
)(
input_features
);
auto
oF
=
THTensor_
(
data
)(
output_features
);
long
spatialVolume
=
THLongTensor_prodall
(
inputSize
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
SparseToDense_ForwardPass
<
real
>
(
iF
,
oF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
nHot
);
oF
+=
_nPlanes
*
spatialVolume
;
}
}
}
extern
"C"
void
scn_DR_
(
SparseToDense_updateGradInput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
_rules
=
_m
.
getSparseToDenseRuleBook
(
inputSize
,
true
);
long
spatialVolume
=
THLongTensor_prodall
(
inputSize
);
uInt
_nPlanes
=
d_input_features
->
size
[
1
];
auto
diF
=
THTensor_
(
data
)(
d_input_features
);
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
SparseToDense_BackwardPass
<
real
>
(
diF
,
doF
,
_nPlanes
,
spatialVolume
,
&
r
[
0
],
nHot
);
doF
+=
_nPlanes
*
spatialVolume
;
}
}
}
#endif
sparseconvnet/SCN/generic/CPU/THGenerateDimFloatTypes.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#error "Define TH_GENERIC_FILE_ before including THGenerateDimFloatTypes.h"
#endif
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#define Dimension 1
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 2
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 3
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 4
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 5
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 6
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 7
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 8
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 9
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#define Dimension 10
#define TH_GENERIC_FILE TH_GENERIC_FILE_
#include "THGenerateFloatTypes.h"
#undef Dimension
#undef TH_GENERIC_FILE_
sparseconvnet/SCN/generic/CPU/THGenerateFloatTypes.h
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h"
#endif
#define real float
#define accreal double
#define Real Float
#define TH_REAL_IS_FLOAT
#line 1 TH_GENERIC_FILE
#include TH_GENERIC_FILE
#undef accreal
#undef real
#undef Real
#undef TH_REAL_IS_FLOAT
#define real double
#define accreal double
#define Real Double
#define TH_REAL_IS_DOUBLE
#line 1 TH_GENERIC_FILE
#include TH_GENERIC_FILE
#undef accreal
#undef real
#undef Real
#undef TH_REAL_IS_DOUBLE
#undef TH_GENERIC_FILE
sparseconvnet/SCN/generic/CPU/UnPooling.cpp
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/CPU/UnPooling.cpp"
#else
#include "UnPooling.h"
extern
"C"
void
scn_DR_
(
UnPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resize2d
)(
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THTensor_
(
zero
)(
output_features
);
auto
iF
=
THTensor_
(
data
)(
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THTensor_
(
data
)(
output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
UnPooling_ForwardPass
<
real
>
(
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
&
r
[
0
],
nHot
,
_rules
.
size
());
}
}
extern
"C"
void
scn_DR_
(
UnPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THTensor
*
input_features
,
THTensor
*
d_input_features
,
THTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THTensor_
(
resizeAs
)(
d_input_features
,
input_features
);
THTensor_
(
zero
)(
d_input_features
);
auto
diF
=
THTensor_
(
data
)(
d_input_features
)
+
nFeaturesToDrop
;
auto
doF
=
THTensor_
(
data
)(
d_output_features
);
for
(
auto
&
r
:
_rules
)
{
uInt
nHot
=
r
.
size
()
/
2
;
UnPooling_BackwardPass
<
real
>
(
diF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
&
r
[
0
],
nHot
,
_rules
.
size
());
}
}
#endif
sparseconvnet/SCN/generic/GPU/ActivePooling.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/ActivePooling.cu"
#else
#include "ActivePooling.h"
extern
"C"
void
scn_DR_
(
ActivePooling_updateOutput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
bool
average
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
_rules
=
_m
.
getActivePoolingRuleBook
(
inputSize
);
uInt
batchSize
=
_rules
[
1
][
0
];
uInt
maxActive
=
_rules
[
1
][
1
];
THCTensor_
(
resize2d
)(
state
,
output_features
,
batchSize
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
if
(
THCITensor_
(
nElement
)(
state
,
rulesBuffer
)
<
1
<<
22
)
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
1
<<
22
);
uInt
*
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
uInt
rowBatchSize
=
std
::
min
((
uInt
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
THAssert
(
rowBatchSize
>
0
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
for
(
uInt
o
=
0
;
o
<
batchSize
;
o
+=
rowBatchSize
)
{
uInt
batchSize_
=
std
::
min
(
rowBatchSize
,
(
uInt
)(
batchSize
-
o
));
cudaMemcpy
(
rb
,
&
_rules
[
0
][
o
*
(
maxActive
+
1
)],
sizeof
(
uInt
)
*
(
maxActive
+
1
)
*
batchSize_
,
cudaMemcpyHostToDevice
);
ActivePooling_ForwardPass
<
real
>
(
iF
,
oF
+
o
*
nPlanes
,
batchSize_
,
maxActive
,
nPlanes
,
rb
,
average
);
}
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
extern
"C"
void
scn_DR_
(
ActivePooling_updateGradInput
)(
THLongTensor
*
inputSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
bool
average
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
_rules
=
_m
.
getActivePoolingRuleBook
(
inputSize
);
uInt
batchSize
=
_rules
[
1
][
0
];
uInt
maxActive
=
_rules
[
1
][
1
];
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
if
(
THCITensor_
(
nElement
)(
state
,
rulesBuffer
)
<
1
<<
22
)
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
1
<<
22
);
uInt
*
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
uInt
rowBatchSize
=
std
::
min
((
uInt
)
32768
,
(
1
<<
22
)
/
(
maxActive
+
1
));
THAssert
(
rowBatchSize
>
0
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
for
(
uInt
o
=
0
;
o
<
batchSize
;
o
+=
rowBatchSize
)
{
uInt
batchSize_
=
std
::
min
(
rowBatchSize
,
(
uInt
)(
batchSize
-
o
));
cudaMemcpy
(
rb
,
&
_rules
[
0
][
o
*
(
maxActive
+
1
)],
sizeof
(
uInt
)
*
(
maxActive
+
1
)
*
batchSize_
,
cudaMemcpyHostToDevice
);
ActivePooling_BackwardPass
<
real
>
(
diF
,
doF
+
o
*
nPlanes
,
batchSize_
,
maxActive
,
nPlanes
,
rb
,
average
);
}
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
#endif
sparseconvnet/SCN/generic/GPU/AffineReluTrivialConvolution.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/AffineReluTrivialConvolution.cu"
#else
#include "AffineReluTrivialConvolution.h"
#include <algorithm>
extern
"C"
void
scn_R_
(
AffineReluTrivialConvolution_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
affineWeight
,
THCTensor
*
affineBias
,
THCTensor
*
convWeight
)
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
input_features
->
size
[
0
],
convWeight
->
size
[
1
]);
dAffineReluTrivialConvolution_forward
<
real
>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
output_features
),
THCTensor_
(
data
)(
state
,
affineWeight
),
THCTensor_
(
data
)(
state
,
affineBias
),
THCTensor_
(
data
)(
state
,
convWeight
),
convWeight
->
size
[
0
],
input_features
->
stride
[
0
],
convWeight
->
size
[
1
],
output_features
->
size
[
1
],
input_features
->
size
[
0
]);
}
extern
"C"
void
scn_R_
(
AffineReluTrivialConvolution_backward
)(
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
affineWeight
,
THCTensor
*
d_affineWeight
,
THCTensor
*
affineBias
,
THCTensor
*
d_affineBias
,
THCTensor
*
convWeight
,
THCTensor
*
d_convWeight
,
bool
additiveGrad
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
dAffineReluTrivialConvolution_backward_dW
<
real
>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
d_input_features
),
THCTensor_
(
data
)(
state
,
d_output_features
),
THCTensor_
(
data
)(
state
,
affineWeight
),
THCTensor_
(
data
)(
state
,
d_affineWeight
),
THCTensor_
(
data
)(
state
,
affineBias
),
THCTensor_
(
data
)(
state
,
d_affineBias
),
THCTensor_
(
data
)(
state
,
convWeight
),
THCTensor_
(
data
)(
state
,
d_convWeight
),
convWeight
->
size
[
0
],
input_features
->
stride
[
0
],
convWeight
->
size
[
1
],
d_output_features
->
stride
[
0
],
input_features
->
size
[
0
],
additiveGrad
);
}
#endif
sparseconvnet/SCN/generic/GPU/AveragePooling.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/AveragePooling.cu"
#else
#include "AveragePooling.h"
#include "RuleBookIterator.h"
extern
"C"
void
scn_DR_
(
AveragePooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
input_features
->
size
[
1
]
-
nFeaturesToDrop
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
AveragePooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
,
_rules
.
size
());
,
)
}
extern
"C"
void
scn_DR_
(
AveragePooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
)
+
nFeaturesToDrop
;
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
AveragePooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
diF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
,
_rules
.
size
());
,
)
}
#endif
sparseconvnet/SCN/generic/GPU/BatchNormalization.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/BatchNormalization.cu"
#else
#include "BatchNormalization.h"
#define BN_F_MACRO(N) \
if (nPlanes % N == 0) { \
BatchNormalization_ForwardPass<real, N, 64>( \
THCTensor_(data)(state, input_features), \
THCTensor_(data)(state, output_features), nPlanes, input_stride, \
output_stride, nActive, THCTensor_(data)(state, saveMean), \
THCTensor_(data)(state, saveInvStd), \
THCTensor_(data)(state, runningMean), \
THCTensor_(data)(state, runningVar), \
weight ? THCTensor_(data)(state, weight) : 0, \
bias ? THCTensor_(data)(state, bias) : 0, eps, momentum, train, \
leakiness); \
}
extern
"C"
void
scn_R_
(
BatchNormalization_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
saveMean
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_F_MACRO
(
16
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
}
}
extern
"C"
void
scn_R_
(
BatchNormalizationInTensor_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
saveMean
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
real
eps
,
real
momentum
,
bool
train
,
real
leakiness
)
{
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_F_MACRO
(
16
)
else
BN_F_MACRO
(
12
)
else
BN_F_MACRO
(
8
)
else
BN_F_MACRO
(
4
)
else
BN_F_MACRO
(
1
)
}
}
#undef BN_F_MACRO
#define BN_B_MACRO(N) \
if (nPlanes % N == 0) { \
BatchNormalization_BackwardPass<real, N, 64>( \
THCTensor_(data)(state, input_features), \
THCTensor_(data)(state, d_input_features), \
THCTensor_(data)(state, output_features), \
THCTensor_(data)(state, d_output_features), nPlanes, input_stride, \
output_stride, nActive, THCTensor_(data)(state, saveMean), \
THCTensor_(data)(state, saveInvStd), \
THCTensor_(data)(state, runningMean), \
THCTensor_(data)(state, runningVar), \
weight ? THCTensor_(data)(state, weight) : 0, \
bias ? THCTensor_(data)(state, bias) : 0, \
d_weight ? THCTensor_(data)(state, d_weight) : 0, \
d_bias ? THCTensor_(data)(state, d_bias) : 0, leakiness); \
}
extern
"C"
void
scn_R_
(
BatchNormalization_backward
)(
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
THCTensor
*
saveMean
,
THCTensor
*
saveInvStd
,
THCTensor
*
runningMean
,
THCTensor
*
runningVar
,
THCTensor
*
weight
,
THCTensor
*
bias
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
real
leakiness
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
if
(
input_features
->
nDimension
==
2
)
{
auto
nActive
=
input_features
->
size
[
0
];
auto
nPlanes
=
input_features
->
size
[
1
];
auto
input_stride
=
input_features
->
stride
[
0
];
auto
output_stride
=
output_features
->
stride
[
0
];
BN_B_MACRO
(
16
)
else
BN_B_MACRO
(
12
)
else
BN_B_MACRO
(
8
)
else
BN_B_MACRO
(
4
)
else
BN_B_MACRO
(
1
)
}
}
#endif
sparseconvnet/SCN/generic/GPU/Convolution.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/Convolution.cu"
#else
#include "Convolution.h"
#include "RuleBookIterator.h"
#include <algorithm>
#include <cstring>
extern
"C"
double
scn_DR_
(
Convolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
Convolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
SubmanifoldConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
filterSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSubmanifoldRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
_m
.
getNActive
(
inputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
SubmanifoldConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
filterSize
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getSubmanifoldRuleBook
(
inputSize
,
filterSize
,
true
);
uInt
nActive
=
_m
.
getNActive
(
inputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
FullConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
mIn
,
void
**
mOut
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mIn
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mOut
)
auto
_rules
=
_mIn
.
getFullConvolutionRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
_mOut
);
uInt
nActive
=
_mOut
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
FullConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
mIn
,
void
**
mOut
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mIn
)
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
mOut
)
auto
_rules
=
_mIn
.
getFullConvolutionRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
_mOut
);
uInt
nActive
=
_mOut
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
extern
"C"
double
scn_DR_
(
RandomizedStrideConvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
,
THCITensor
*
rulesBuffer
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
double
flops
=
0
;
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
}
return
flops
;
}
extern
"C"
void
scn_DR_
(
RandomizedStrideConvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
if
(
nActive
)
{
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dConvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
}
#endif
sparseconvnet/SCN/generic/GPU/Deconvolution.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/Deconvolution.cu"
#else
#include "Convolution.h"
#include "Deconvolution.h"
#include <algorithm>
extern
"C"
double
scn_DR_
(
Deconvolution_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
THCTensor
*
weight
,
THCTensor
*
bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
weight
->
size
[
1
]);
if
(
not
bias
)
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
double
flops
=
0
;
if
(
bias
)
{
auto
b
=
THCTensor_
(
data
)(
state
,
bias
);
for
(
uInt
i
=
0
;
i
<
op
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
op
-
i
);
uInt
gridDim
=
min
(
4096
,
nActive
);
Convolution_fp_bias
<<
<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
+
i
,
b
+
i
,
op
,
op
,
nActive
);
}
}
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dDeconvolution_forward2
<
real
>
(
iF
,
oF
,
w
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
flops
+=
nHotB
*
c
;)
return
flops
;
}
extern
"C"
void
scn_DR_
(
Deconvolution_backward
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
filterSize
,
THLongTensor
*
filterStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
THCTensor
*
weight
,
THCTensor
*
d_weight
,
THCTensor
*
d_bias
,
long
filterVolume
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
_rules
=
_m
.
getRuleBook
(
outputSize
,
inputSize
,
filterSize
,
filterStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
ip
=
input_features
->
size
[
1
];
auto
op
=
d_output_features
->
size
[
1
];
auto
w
=
THCTensor_
(
data
)(
state
,
weight
);
auto
dw
=
THCTensor_
(
data
)(
state
,
d_weight
);
uInt
c
=
ip
*
op
;
RULEBOOKITERATOR
(
dDeconvolution_backward_dW2
<
real
>
(
iF
,
diF
,
doF
,
w
,
dw
,
rbB
,
nHotB
,
ip
,
ip
,
op
,
op
,
THCState_getCurrentStream
(
state
));
,
w
+=
c
;
dw
+=
c
;)
if
(
d_bias
)
{
auto
db
=
THCTensor_
(
data
)(
state
,
d_bias
);
Convolution_bp_bias
(
doF
,
db
,
op
,
op
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
#endif
sparseconvnet/SCN/generic/GPU/IOLayers.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/IOLayers.cu"
#else
#include "IOLayers.h"
extern
"C"
void
scn_DR_
(
InputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
batchSize
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
inputLayer
(
spatialSize
,
input_coords
,
batchSize
,
mode
);
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
InputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
output_features
,
rules
[
0
][
2
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
OutputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
inputLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
auto
maxActive
=
rules
[
0
][
1
];
auto
nRows
=
rules
[
0
][
3
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateOutput
)(
void
**
m
,
THLongTensor
*
spatialSize
,
THLongTensor
*
input_coords
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
mode
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
_m
.
blLayer
(
spatialSize
,
input_coords
,
mode
);
uInt
nPlanes
=
input_features
->
size
[
2
];
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
*
_m
.
inputNActive
,
nPlanes
);
}
else
{
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
iF
,
oF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLInputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
1
];
uInt
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THCTensor_
(
resize3d
)(
state
,
d_input_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
diF
,
doF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
mode
==
4
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateOutput
)(
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
input_features
->
size
[
1
];
auto
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
THCTensor_
(
copy
)(
state
,
output_features
,
input_features
);
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
}
else
{
THCTensor_
(
resize3d
)(
state
,
output_features
,
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_bp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
oF
,
iF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
extern
"C"
void
scn_DR_
(
BLOutputLayer_updateGradInput
)(
void
**
m
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
auto
&
rules
=
_m
.
blLayerRuleBook
;
uInt
nPlanes
=
d_output_features
->
size
[
2
];
uInt
mode
=
rules
[
0
][
0
];
uInt
maxActive
=
rules
[
0
][
1
];
uInt
nRows
=
rules
[
0
][
4
];
if
(
mode
==
0
)
{
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
copy
)(
state
,
d_input_features
,
d_output_features
);
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
}
else
{
THCTensor_
(
resize2d
)(
state
,
d_input_features
,
nRows
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
rulesBuffer
=
THCITensor_
(
new
)(
state
);
THCITensor_
(
resize1d
)(
state
,
rulesBuffer
,
sizeof
(
uInt
)
*
rules
[
1
].
size
());
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
auto
rb
=
(
uInt
*
)
THCITensor_
(
data
)(
state
,
rulesBuffer
);
cudaMemcpy
(
rb
,
&
rules
[
1
][
0
],
sizeof
(
uInt
)
*
rules
[
1
].
size
(),
cudaMemcpyHostToDevice
);
InputLayer_fp
<
real
><<<
std
::
min
(
nRows
,
32768U
),
std
::
min
(
nPlanes
,
32U
),
0
,
THCState_getCurrentStream
(
state
)
>>>
(
doF
,
diF
,
nRows
,
maxActive
,
nPlanes
,
rb
,
false
);
THCITensor_
(
free
)(
state
,
rulesBuffer
);
}
}
#endif
sparseconvnet/SCN/generic/GPU/LeakyReLU.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/LeakyReLU.cu"
#else
#include "LeakyReLU.h"
extern
"C"
void
scn_R_
(
LeakyReLU_updateOutput
)(
THCTensor
*
input_features
,
THCTensor
*
output_features
,
float
alpha
)
{
if
(
input_features
!=
output_features
)
THCTensor_
(
resizeAs
)(
state
,
output_features
,
input_features
);
auto
n
=
THCTensor_
(
nElement
)(
state
,
input_features
);
LeakyReLU_fp
<
real
>
<<
<
16
,
1024
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
output_features
),
n
,
alpha
);
}
extern
"C"
void
scn_R_
(
LeakyReLU_updateGradInput
)(
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
d_output_features
,
float
alpha
)
{
if
(
d_input_features
!=
d_output_features
)
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
d_output_features
);
auto
n
=
THCTensor_
(
nElement
)(
state
,
d_input_features
);
LeakyReLU_bp
<
real
>
<<
<
16
,
1024
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
THCTensor_
(
data
)(
state
,
input_features
),
THCTensor_
(
data
)(
state
,
d_input_features
),
THCTensor_
(
data
)(
state
,
d_output_features
),
n
,
alpha
);
}
#endif
sparseconvnet/SCN/generic/GPU/MaxPooling.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE_
#define TH_GENERIC_FILE_ "generic/GPU/MaxPooling.cu"
#else
#include "MaxPooling.h"
#include "RuleBookIterator.h"
extern
"C"
void
scn_DR_
(
MaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
MaxPooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
MaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
MaxPooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateOutput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resize2d
)(
state
,
output_features
,
nActive
,
nPlanes
);
THCTensor_
(
zero
)(
state
,
output_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
)
+
nFeaturesToDrop
;
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
RULEBOOKITERATOR
(
MaxPooling_ForwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
oF
,
nPlanes
,
input_features
->
size
[
1
],
output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
extern
"C"
void
scn_DR_
(
RandomizedStrideMaxPooling_updateGradInput
)(
THLongTensor
*
inputSize
,
THLongTensor
*
outputSize
,
THLongTensor
*
poolSize
,
THLongTensor
*
poolStride
,
void
**
m
,
THCTensor
*
input_features
,
THCTensor
*
d_input_features
,
THCTensor
*
output_features
,
THCTensor
*
d_output_features
,
long
nFeaturesToDrop
)
{
SCN_INITIALIZE_AND_REFERENCE
(
Metadata
<
Dimension
>
,
m
)
uInt
nPlanes
=
input_features
->
size
[
1
]
-
nFeaturesToDrop
;
auto
_rules
=
_m
.
getRandomizedStrideRuleBook
(
inputSize
,
outputSize
,
poolSize
,
poolStride
,
true
);
uInt
nActive
=
_m
.
getNActive
(
outputSize
);
THCTensor_
(
resizeAs
)(
state
,
d_input_features
,
input_features
);
THCTensor_
(
zero
)(
state
,
d_input_features
);
auto
iF
=
THCTensor_
(
data
)(
state
,
input_features
);
auto
oF
=
THCTensor_
(
data
)(
state
,
output_features
);
auto
diF
=
THCTensor_
(
data
)(
state
,
d_input_features
);
auto
doF
=
THCTensor_
(
data
)(
state
,
d_output_features
);
RULEBOOKITERATOR
(
MaxPooling_BackwardPass
<
real
>
(
THCState_getCurrentStream
(
state
),
iF
,
diF
,
oF
,
doF
,
nPlanes
,
input_features
->
size
[
1
],
d_output_features
->
size
[
1
],
rbB
,
nHotB
);
,
)
}
#endif
sparseconvnet/SCN/generic/GPU/NetworkInNetwork.cu
deleted
100644 → 0
View file @
6d4475db
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/GPU/NetworkInNetwork.cu"
#else
#include "Convolution.h"
#include <algorithm>
extern
"C"
double
scn_R_
(
NetworkInNetwork_updateOutput
)(
THCTensor
*
input_features_
,
THCTensor
*
output_features_
,
THCTensor
*
weight_
,
THCTensor
*
bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THCTensor_
(
resize2d
)(
state
,
output_features_
,
nActive
,
output_nPlanes
);
auto
input_features
=
THCTensor_
(
data
)(
state
,
input_features_
);
auto
output_features
=
THCTensor_
(
data
)(
state
,
output_features_
);
auto
weight
=
THCTensor_
(
data
)(
state
,
weight_
);
if
(
bias_
!=
nullptr
)
{
auto
bias
=
THCTensor_
(
data
)(
state
,
bias_
);
for
(
uInt
i
=
0
;
i
<
output_nPlanes
;
i
+=
32
)
{
uInt
blockDim
=
min
(
32L
,
output_nPlanes
-
i
);
uInt
gridDim
=
min
(
4096L
,
nActive
);
Convolution_fp_bias
<<<
gridDim
,
blockDim
,
0
,
THCState_getCurrentStream
(
state
)
>>>
(
output_features
+
i
,
bias
+
i
,
output_nPlanes
,
output_nPlanes
,
nActive
);
}
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is l*m (row-major)
// weight is m*r (row-major)
// output_features is l*r (row-major)
// buffer * weights + bias -> output_features
THBLAS_GEMM
(
state
,
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
1
,
// beta
output_features
,
output_nPlanes
// r
);
}
else
{
THCTensor_
(
zero
)(
state
,
output_features_
);
THBLAS_GEMM
(
state
,
'n'
,
'n'
,
output_nPlanes
,
// r
nActive
,
// l
input_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// m
0
,
// beta
output_features
,
output_nPlanes
// r
);
}
return
nActive
*
input_nPlanes
*
output_nPlanes
;
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_updateGradInput
)(
THCTensor
*
d_input_features_
,
THCTensor
*
d_output_features_
,
THCTensor
*
weight_
)
{
auto
nActive
=
d_output_features_
->
size
[
0
];
auto
input_nPlanes
=
weight_
->
size
[
0
];
auto
output_nPlanes
=
weight_
->
size
[
1
];
THCTensor_
(
resize2d
)(
state
,
d_input_features_
,
nActive
,
input_nPlanes
);
THCTensor_
(
zero
)(
state
,
d_input_features_
);
auto
d_input_features
=
THCTensor_
(
data
)(
state
,
d_input_features_
);
auto
d_output_features
=
THCTensor_
(
data
)(
state
,
d_output_features_
);
auto
weight
=
THCTensor_
(
data
)(
state
,
weight_
);
// Do GEMM (note: gemm assumes column-major matrices)
// d_output_features is l*m (row-major)
// weights is r*m (row-major)
// d_buffer is l*r (row-major)
// d_output_features * T(weight) -> d_buffer
THBLAS_GEMM
(
state
,
't'
,
'n'
,
input_nPlanes
,
// r
nActive
,
// l
output_nPlanes
,
// m
1
,
// alpha
weight
,
output_nPlanes
,
// m
d_output_features
,
output_nPlanes
,
// m
0
,
// beta
d_input_features
,
input_nPlanes
// r
);
}
extern
"C"
void
scn_R_
(
NetworkInNetwork_accGradParameters
)(
THCTensor
*
input_features_
,
THCTensor
*
d_output_features_
,
THCTensor
*
d_weight_
,
THCTensor
*
d_bias_
)
{
auto
nActive
=
input_features_
->
size
[
0
];
auto
input_nPlanes
=
d_weight_
->
size
[
0
];
auto
output_nPlanes
=
d_weight_
->
size
[
1
];
auto
input_features
=
THCTensor_
(
data
)(
state
,
input_features_
);
auto
d_output_features
=
THCTensor_
(
data
)(
state
,
d_output_features_
);
auto
d_weight
=
THCTensor_
(
data
)(
state
,
d_weight_
);
// Do GEMM (note: gemm assumes column-major matrices)
// buffer is m*l (row-major)
// d_output_features is m*r (row-major)
// weights is l*r (row-major)
// T(buffer) * d_output_features -> d_weight
THBLAS_GEMM
(
state
,
'n'
,
't'
,
output_nPlanes
,
// r
input_nPlanes
,
// l
nActive
,
// m
1
,
// alpha
d_output_features
,
output_nPlanes
,
// r
input_features
,
input_nPlanes
,
// l
1
,
// beta
d_weight
,
output_nPlanes
// r
);
if
(
d_bias_
)
{
auto
d_bias
=
THCTensor_
(
data
)(
state
,
d_bias_
);
Convolution_bp_bias
(
d_output_features
,
d_bias
,
output_nPlanes
,
output_nPlanes
,
nActive
,
THCState_getCurrentStream
(
state
));
}
}
#endif
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment