Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
SparseConvNet
Commits
de3743f6
Commit
de3743f6
authored
Jul 13, 2018
by
Benjamin Thomas Graham
Browse files
Factor out CUDA code
parent
f0407b36
Changes
96
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
296 additions
and
372 deletions
+296
-372
.gitignore
.gitignore
+3
-2
build.sh
build.sh
+1
-1
examples/3d_segmentation/fully_convolutional.py
examples/3d_segmentation/fully_convolutional.py
+1
-1
examples/3d_segmentation/unet.py
examples/3d_segmentation/unet.py
+1
-1
examples/Assamese_handwriting/data.py
examples/Assamese_handwriting/data.py
+1
-2
examples/Assamese_handwriting/process.sh
examples/Assamese_handwriting/process.sh
+1
-0
setup.py
setup.py
+3
-2
sparseconvnet/SCN/CPU/ActivePooling.cpp
sparseconvnet/SCN/CPU/ActivePooling.cpp
+33
-1
sparseconvnet/SCN/CPU/ActivePooling.h
sparseconvnet/SCN/CPU/ActivePooling.h
+0
-44
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
+62
-1
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.h
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.h
+0
-72
sparseconvnet/SCN/CPU/AveragePooling.cpp
sparseconvnet/SCN/CPU/AveragePooling.cpp
+25
-1
sparseconvnet/SCN/CPU/AveragePooling.h
sparseconvnet/SCN/CPU/AveragePooling.h
+0
-36
sparseconvnet/SCN/CPU/BatchNormalization.cpp
sparseconvnet/SCN/CPU/BatchNormalization.cpp
+102
-23
sparseconvnet/SCN/CPU/BatchNormalization.h
sparseconvnet/SCN/CPU/BatchNormalization.h
+0
-114
sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
+2
-2
sparseconvnet/SCN/CPU/Convolution.cpp
sparseconvnet/SCN/CPU/Convolution.cpp
+6
-2
sparseconvnet/SCN/CPU/IOLayers.cpp
sparseconvnet/SCN/CPU/IOLayers.cpp
+53
-17
sparseconvnet/SCN/CPU/IOLayers.h
sparseconvnet/SCN/CPU/IOLayers.h
+0
-47
sparseconvnet/SCN/CPU/LeakyReLU.cpp
sparseconvnet/SCN/CPU/LeakyReLU.cpp
+2
-3
No files found.
.gitignore
View file @
de3743f6
SparseConvNetTorch/build/
SparseConvNetTorch/build/
*.t7
*.pth
t7/
*.o
*.o
*.a
*.a
*.so
*.so
...
@@ -11,3 +10,5 @@ pickle
...
@@ -11,3 +10,5 @@ pickle
PyTorch/sparseconvnet.egg-info/
PyTorch/sparseconvnet.egg-info/
PyTorch/sparseconvnet/SCN/__init__.py
PyTorch/sparseconvnet/SCN/__init__.py
sparseconvnet.egg-info
sparseconvnet.egg-info
*.zip
*.rar
build.sh
View file @
de3743f6
#!/bin/bash
#!/bin/bash
rm
-rf
build/ sparseconvnet.egg-info sparseconvnet_SCN
*
.so
rm
-rf
build/
dist/
sparseconvnet.egg-info sparseconvnet_SCN
*
.so
python setup.py
install
python setup.py
install
examples/3d_segmentation/fully_convolutional.py
View file @
de3743f6
...
@@ -47,7 +47,7 @@ p['initial_lr'] = 1e-1
...
@@ -47,7 +47,7 @@ p['initial_lr'] = 1e-1
p
[
'lr_decay'
]
=
4e-2
p
[
'lr_decay'
]
=
4e-2
p
[
'weight_decay'
]
=
1e-4
p
[
'weight_decay'
]
=
1e-4
p
[
'momentum'
]
=
0.9
p
[
'momentum'
]
=
0.9
p
[
'check_point'
]
=
Tru
e
p
[
'check_point'
]
=
Fals
e
p
[
'use_cuda'
]
=
torch
.
cuda
.
is_available
()
p
[
'use_cuda'
]
=
torch
.
cuda
.
is_available
()
dtype
=
'torch.cuda.FloatTensor'
if
p
[
'use_cuda'
]
else
'torch.FloatTensor'
dtype
=
'torch.cuda.FloatTensor'
if
p
[
'use_cuda'
]
else
'torch.FloatTensor'
dtypei
=
'torch.cuda.LongTensor'
if
p
[
'use_cuda'
]
else
'torch.LongTensor'
dtypei
=
'torch.cuda.LongTensor'
if
p
[
'use_cuda'
]
else
'torch.LongTensor'
...
...
examples/3d_segmentation/unet.py
View file @
de3743f6
...
@@ -47,7 +47,7 @@ p['initial_lr'] = 1e-1
...
@@ -47,7 +47,7 @@ p['initial_lr'] = 1e-1
p
[
'lr_decay'
]
=
4e-2
p
[
'lr_decay'
]
=
4e-2
p
[
'weight_decay'
]
=
1e-4
p
[
'weight_decay'
]
=
1e-4
p
[
'momentum'
]
=
0.9
p
[
'momentum'
]
=
0.9
p
[
'check_point'
]
=
Tru
e
p
[
'check_point'
]
=
Fals
e
p
[
'use_cuda'
]
=
torch
.
cuda
.
is_available
()
p
[
'use_cuda'
]
=
torch
.
cuda
.
is_available
()
dtype
=
'torch.cuda.FloatTensor'
if
p
[
'use_cuda'
]
else
'torch.FloatTensor'
dtype
=
'torch.cuda.FloatTensor'
if
p
[
'use_cuda'
]
else
'torch.FloatTensor'
dtypei
=
'torch.cuda.LongTensor'
if
p
[
'use_cuda'
]
else
'torch.LongTensor'
dtypei
=
'torch.cuda.LongTensor'
if
p
[
'use_cuda'
]
else
'torch.LongTensor'
...
...
examples/Assamese_handwriting/data.py
View file @
de3743f6
...
@@ -4,8 +4,7 @@
...
@@ -4,8 +4,7 @@
# This source code is licensed under the license found in the
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# LICENSE file in the root directory of this source tree.
import
torch
import
torch
,
torch
.
utils
.
data
import
torchnet
import
sparseconvnet
as
scn
import
sparseconvnet
as
scn
import
pickle
import
pickle
import
math
import
math
...
...
examples/Assamese_handwriting/process.sh
View file @
de3743f6
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
# This source code is licensed under the license found in the
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# LICENSE file in the root directory of this source tree.
#!/bin/bash
#!/bin/bash
set
-e
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00208/Online%20Handwritten%20Assamese%20Characters%20Dataset.rar
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00208/Online%20Handwritten%20Assamese%20Characters%20Dataset.rar
unrar e
-cl
-y
"Online Handwritten Assamese Characters Dataset.rar"
unrar e
-cl
-y
"Online Handwritten Assamese Characters Dataset.rar"
mkdir
tmp
mkdir
tmp
...
...
setup.py
View file @
de3743f6
...
@@ -24,12 +24,13 @@ setup(
...
@@ -24,12 +24,13 @@ setup(
packages
=
[
'sparseconvnet'
,
'sparseconvnet.SCN'
],
packages
=
[
'sparseconvnet'
,
'sparseconvnet.SCN'
],
ext_modules
=
[
ext_modules
=
[
CUDAExtension
(
'sparseconvnet_SCN'
,
CUDAExtension
(
'sparseconvnet_SCN'
,
[
'sparseconvnet/SCN/pybind_cuda.cpp'
,
'sparseconvnet/SCN/instantiate_cpu.cpp'
,
'sparseconvnet/SCN/instantiate_cuda.cu'
],
[
'sparseconvnet/SCN/cuda.cu'
,
'sparseconvnet/SCN/sparseconvnet_cuda.cpp'
,
'sparseconvnet/SCN/pybind.cpp'
],
include_dirs
=
[
conda_include_dir
,
this_dir
+
'/sparseconvnet/SCN/'
],
include_dirs
=
[
conda_include_dir
,
this_dir
+
'/sparseconvnet/SCN/'
],
extra_compile_args
=
extra
)
extra_compile_args
=
extra
)
if
torch
.
cuda
.
is_available
()
else
if
torch
.
cuda
.
is_available
()
else
CppExtension
(
'sparseconvnet_SCN'
,
CppExtension
(
'sparseconvnet_SCN'
,
[
'sparseconvnet/SCN/pybind
_cpu
.cpp'
,
'sparseconvnet/SCN/
instantiate
_cpu.cpp'
],
[
'sparseconvnet/SCN/pybind.cpp'
,
'sparseconvnet/SCN/
sparseconvnet
_cpu.cpp'
],
include_dirs
=
[
conda_include_dir
,
this_dir
+
'/sparseconvnet/SCN/'
],
include_dirs
=
[
conda_include_dir
,
this_dir
+
'/sparseconvnet/SCN/'
],
extra_compile_args
=
extra
[
'cxx'
])],
extra_compile_args
=
extra
[
'cxx'
])],
cmdclass
=
{
'build_ext'
:
BuildExtension
},
cmdclass
=
{
'build_ext'
:
BuildExtension
},
...
...
sparseconvnet/SCN/CPU/ActivePooling.cpp
View file @
de3743f6
...
@@ -4,7 +4,39 @@
...
@@ -4,7 +4,39 @@
// This source code is licensed under the license found in the
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
// LICENSE file in the root directory of this source tree.
#include "ActivePooling.h"
// Assume output_features and d_input_features have been zero-ed
template
<
typename
T
>
void
ActivePooling_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
batchSize
,
Int
maxActive
,
Int
nPlanes
,
RuleBook
&
rules
,
bool
average
)
{
for
(
Int
outSite
=
0
;
outSite
<
batchSize
;
outSite
++
)
{
T
*
out
=
&
output_features
[
outSite
*
nPlanes
];
Int
*
r
=
&
rules
[
0
][
outSite
*
(
maxActive
+
1
)];
Int
nActive
=
*
r
++
;
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
while
(
nActive
--
>
0
)
{
T
*
inp
=
&
input_features
[(
*
r
++
)
*
nPlanes
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
out
[
plane
]
+=
inp
[
plane
]
*
multiplier
;
}
}
}
template
<
typename
T
>
void
ActivePooling_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
Int
batchSize
,
Int
maxActive
,
Int
nPlanes
,
RuleBook
&
rules
,
bool
average
)
{
for
(
Int
outSite
=
0
;
outSite
<
batchSize
;
outSite
++
)
{
T
*
out
=
&
d_output_features
[
outSite
*
nPlanes
];
Int
*
r
=
&
rules
[
0
][
outSite
*
(
maxActive
+
1
)];
Int
nActive
=
*
r
++
;
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
while
(
nActive
--
>
0
)
{
T
*
inp
=
&
d_input_features
[(
*
r
++
)
*
nPlanes
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
inp
[
plane
]
=
out
[
plane
]
*
multiplier
;
}
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
void
cpu_ActivePooling_updateOutput
(
void
cpu_ActivePooling_updateOutput
(
...
...
sparseconvnet/SCN/CPU/ActivePooling.h
deleted
100644 → 0
View file @
f0407b36
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_ACTIVEPOOLING_H
#define CPU_ACTIVEPOOLING_H
// Assume output_features and d_input_features have been zero-ed
template
<
typename
T
>
void
ActivePooling_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
batchSize
,
Int
maxActive
,
Int
nPlanes
,
RuleBook
&
rules
,
bool
average
)
{
for
(
Int
outSite
=
0
;
outSite
<
batchSize
;
outSite
++
)
{
T
*
out
=
&
output_features
[
outSite
*
nPlanes
];
Int
*
r
=
&
rules
[
0
][
outSite
*
(
maxActive
+
1
)];
Int
nActive
=
*
r
++
;
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
while
(
nActive
--
>
0
)
{
T
*
inp
=
&
input_features
[(
*
r
++
)
*
nPlanes
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
out
[
plane
]
+=
inp
[
plane
]
*
multiplier
;
}
}
}
template
<
typename
T
>
void
ActivePooling_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
Int
batchSize
,
Int
maxActive
,
Int
nPlanes
,
RuleBook
&
rules
,
bool
average
)
{
for
(
Int
outSite
=
0
;
outSite
<
batchSize
;
outSite
++
)
{
T
*
out
=
&
d_output_features
[
outSite
*
nPlanes
];
Int
*
r
=
&
rules
[
0
][
outSite
*
(
maxActive
+
1
)];
Int
nActive
=
*
r
++
;
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
while
(
nActive
--
>
0
)
{
T
*
inp
=
&
d_input_features
[(
*
r
++
)
*
nPlanes
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
inp
[
plane
]
=
out
[
plane
]
*
multiplier
;
}
}
}
#endif
/* CPU_ACTIVEPOOLING_H */
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.cpp
View file @
de3743f6
...
@@ -4,7 +4,68 @@
...
@@ -4,7 +4,68 @@
// This source code is licensed under the license found in the
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
// LICENSE file in the root directory of this source tree.
#include "AffineReluTrivialConvolution.h"
#include <cstring>
template
<
typename
T
>
void
AffineReluTrivialConvolution_ForwardPass
(
T
*
input_features
,
Int
input_nPlanes
,
Int
input_stride
,
T
*
output_features
,
Int
output_nPlanes
,
Int
output_stride
,
T
*
affineWeight
,
T
*
affineBias
,
T
*
convWeight
,
Int
nActive
)
{
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
{
for
(
Int
column
=
0
;
column
<
output_nPlanes
;
column
++
)
{
T
sum
=
0
;
for
(
Int
j
=
0
;
j
<
input_nPlanes
;
j
++
)
{
T
i
=
input_features
[
row
*
input_stride
+
j
]
*
affineWeight
[
j
]
+
affineBias
[
j
];
i
=
(
i
>
0
)
?
i
:
0
;
sum
+=
i
*
convWeight
[
j
*
output_nPlanes
+
column
];
}
output_features
[
row
*
output_stride
+
column
]
=
sum
;
}
}
}
template
<
typename
T
>
void
AffineReluTrivialConvolution_BackwardPass
(
T
*
input_features
,
T
*
d_input_features
,
Int
input_nPlanes
,
Int
input_stride
,
T
*
d_output_features
,
Int
output_nPlanes
,
Int
output_stride
,
T
*
affineWeight
,
T
*
dAffineWeight
,
T
*
affineBias
,
T
*
dAffineBias
,
T
*
convWeight
,
T
*
dConvWeight
,
Int
nActive
,
bool
additiveGrad
)
{
for
(
Int
row
=
0
;
row
<
input_nPlanes
;
row
++
)
{
for
(
Int
column
=
0
;
column
<
output_nPlanes
;
column
++
)
{
T
sum
=
0
;
for
(
Int
j
=
0
;
j
<
nActive
;
j
++
)
{
T
i
=
input_features
[
j
*
input_stride
+
row
]
*
affineWeight
[
row
]
+
affineBias
[
row
];
i
=
(
i
>
0
)
?
i
:
0
;
sum
+=
i
*
d_output_features
[
j
*
output_stride
+
column
];
}
dConvWeight
[
row
*
output_nPlanes
+
column
]
+=
sum
;
}
}
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
{
for
(
Int
column
=
0
;
column
<
input_nPlanes
;
column
++
)
{
T
sum
=
0
;
for
(
Int
j
=
0
;
j
<
output_nPlanes
;
j
++
)
{
sum
+=
d_output_features
[
row
*
output_stride
+
j
]
*
convWeight
[
column
*
output_nPlanes
+
j
];
}
T
i
=
input_features
[
row
*
input_stride
+
column
]
*
affineWeight
[
column
]
+
affineBias
[
column
];
if
(
i
<=
0
)
// d_ReLU
sum
=
0
;
dAffineWeight
[
column
]
+=
sum
*
i
;
dAffineBias
[
column
]
+=
sum
;
sum
*=
affineWeight
[
column
];
if
(
additiveGrad
)
d_input_features
[
row
*
input_stride
+
column
]
+=
sum
;
else
d_input_features
[
row
*
input_stride
+
column
]
=
sum
;
}
}
}
template
<
typename
T
>
template
<
typename
T
>
double
cpu_AffineReluTrivialConvolution_updateOutput
(
double
cpu_AffineReluTrivialConvolution_updateOutput
(
...
...
sparseconvnet/SCN/CPU/AffineReluTrivialConvolution.h
deleted
100644 → 0
View file @
f0407b36
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_AffineReluTrivialConvolution_H
#define CPU_AffineReluTrivialConvolution_H
#include <cstring>
template
<
typename
T
>
void
AffineReluTrivialConvolution_ForwardPass
(
T
*
input_features
,
Int
input_nPlanes
,
Int
input_stride
,
T
*
output_features
,
Int
output_nPlanes
,
Int
output_stride
,
T
*
affineWeight
,
T
*
affineBias
,
T
*
convWeight
,
Int
nActive
)
{
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
{
for
(
Int
column
=
0
;
column
<
output_nPlanes
;
column
++
)
{
T
sum
=
0
;
for
(
Int
j
=
0
;
j
<
input_nPlanes
;
j
++
)
{
T
i
=
input_features
[
row
*
input_stride
+
j
]
*
affineWeight
[
j
]
+
affineBias
[
j
];
i
=
(
i
>
0
)
?
i
:
0
;
sum
+=
i
*
convWeight
[
j
*
output_nPlanes
+
column
];
}
output_features
[
row
*
output_stride
+
column
]
=
sum
;
}
}
}
template
<
typename
T
>
void
AffineReluTrivialConvolution_BackwardPass
(
T
*
input_features
,
T
*
d_input_features
,
Int
input_nPlanes
,
Int
input_stride
,
T
*
d_output_features
,
Int
output_nPlanes
,
Int
output_stride
,
T
*
affineWeight
,
T
*
dAffineWeight
,
T
*
affineBias
,
T
*
dAffineBias
,
T
*
convWeight
,
T
*
dConvWeight
,
Int
nActive
,
bool
additiveGrad
)
{
for
(
Int
row
=
0
;
row
<
input_nPlanes
;
row
++
)
{
for
(
Int
column
=
0
;
column
<
output_nPlanes
;
column
++
)
{
T
sum
=
0
;
for
(
Int
j
=
0
;
j
<
nActive
;
j
++
)
{
T
i
=
input_features
[
j
*
input_stride
+
row
]
*
affineWeight
[
row
]
+
affineBias
[
row
];
i
=
(
i
>
0
)
?
i
:
0
;
sum
+=
i
*
d_output_features
[
j
*
output_stride
+
column
];
}
dConvWeight
[
row
*
output_nPlanes
+
column
]
+=
sum
;
}
}
for
(
Int
row
=
0
;
row
<
nActive
;
row
++
)
{
for
(
Int
column
=
0
;
column
<
input_nPlanes
;
column
++
)
{
T
sum
=
0
;
for
(
Int
j
=
0
;
j
<
output_nPlanes
;
j
++
)
{
sum
+=
d_output_features
[
row
*
output_stride
+
j
]
*
convWeight
[
column
*
output_nPlanes
+
j
];
}
T
i
=
input_features
[
row
*
input_stride
+
column
]
*
affineWeight
[
column
]
+
affineBias
[
column
];
if
(
i
<=
0
)
// d_ReLU
sum
=
0
;
dAffineWeight
[
column
]
+=
sum
*
i
;
dAffineBias
[
column
]
+=
sum
;
sum
*=
affineWeight
[
column
];
if
(
additiveGrad
)
d_input_features
[
row
*
input_stride
+
column
]
+=
sum
;
else
d_input_features
[
row
*
input_stride
+
column
]
=
sum
;
}
}
}
#endif
/* CPU_AffineReluTrivialConvolution_H */
sparseconvnet/SCN/CPU/AveragePooling.cpp
View file @
de3743f6
...
@@ -4,7 +4,31 @@
...
@@ -4,7 +4,31 @@
// This source code is licensed under the license found in the
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
// LICENSE file in the root directory of this source tree.
#include "AveragePooling.h"
template
<
typename
T
>
void
AveragePooling_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
*
rules
,
Int
nHot
,
Int
filterVolume
)
{
for
(
Int
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
Int
i
=
rules
[
2
*
outSite
]
*
input_stride
;
Int
o
=
rules
[
2
*
outSite
+
1
]
*
output_stride
;
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
output_features
[
o
+
plane
]
+=
input_features
[
i
+
plane
]
/
filterVolume
;
}
}
template
<
typename
T
>
void
AveragePooling_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
*
rules
,
Int
nHot
,
Int
filterVolume
)
{
for
(
Int
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
Int
i
=
rules
[
2
*
outSite
]
*
input_stride
;
Int
o
=
rules
[
2
*
outSite
+
1
]
*
output_stride
;
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
d_input_features
[
i
+
plane
]
+=
d_output_features
[
o
+
plane
]
/
filterVolume
;
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
void
cpu_AveragePooling_updateOutput
(
void
cpu_AveragePooling_updateOutput
(
...
...
sparseconvnet/SCN/CPU/AveragePooling.h
deleted
100644 → 0
View file @
f0407b36
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_AVERAGEPOOLING_H
#define CPU_AVERAGEPOOLING_H
template
<
typename
T
>
void
AveragePooling_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
*
rules
,
Int
nHot
,
Int
filterVolume
)
{
for
(
Int
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
Int
i
=
rules
[
2
*
outSite
]
*
input_stride
;
Int
o
=
rules
[
2
*
outSite
+
1
]
*
output_stride
;
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
output_features
[
o
+
plane
]
+=
input_features
[
i
+
plane
]
/
filterVolume
;
}
}
template
<
typename
T
>
void
AveragePooling_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
*
rules
,
Int
nHot
,
Int
filterVolume
)
{
for
(
Int
outSite
=
0
;
outSite
<
nHot
;
outSite
++
)
{
Int
i
=
rules
[
2
*
outSite
]
*
input_stride
;
Int
o
=
rules
[
2
*
outSite
+
1
]
*
output_stride
;
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
d_input_features
[
i
+
plane
]
+=
d_output_features
[
o
+
plane
]
/
filterVolume
;
}
}
#endif
/* CPU_AVERAGEPOOLING_H */
sparseconvnet/SCN/CPU/BatchNormalization.cpp
View file @
de3743f6
...
@@ -4,46 +4,125 @@
...
@@ -4,46 +4,125 @@
// This source code is licensed under the license found in the
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
// LICENSE file in the root directory of this source tree.
#include "BatchNormalization.h"
#include <vector>
// in/output_stride is normally the same as nPlanes; allow other values to act
// on a subset of columns, i.e. an inplace DenseNet blocks
template
<
typename
T
>
template
<
typename
T
>
void
cpu_BatchNormalization_updateOutput
(
void
BatchNormalization_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
output_features
,
Int
nPlanes
,
Int
input_stride
,
/*float*/
at
::
Tensor
saveMean
,
Int
output_stride
,
Int
nActive
,
T
*
saveMean
,
/*float*/
at
::
Tensor
saveInvStd
,
/*float*/
at
::
Tensor
runningMean
,
T
*
saveInvStd
,
T
*
runningMean
,
/*float*/
at
::
Tensor
runningVar
,
T
*
runningVar
,
T
*
weight
,
T
*
bias
,
T
eps
,
/*float*/
at
::
Tensor
weight
,
/*float*/
at
::
Tensor
bias
,
T
eps
,
T
momentum
,
T
momentum
,
bool
train
,
T
leakiness
)
{
bool
train
,
T
leakiness
)
{
if
(
train
)
{
output_features
.
resize_as_
(
input_features
);
std
::
memset
(
saveMean
,
0
,
nPlanes
*
sizeof
(
T
));
if
(
input_features
.
ndimension
()
==
2
)
{
std
::
memset
(
saveInvStd
,
0
,
nPlanes
*
sizeof
(
T
));
auto
nActive
=
input_features
.
size
(
0
);
for
(
Int
row
=
0
,
ci
=
0
;
row
<
nActive
;
auto
nPlanes
=
input_features
.
size
(
1
);
row
++
,
ci
+=
input_stride
-
nPlanes
)
{
auto
input_stride
=
input_features
.
stride
(
0
);
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
)
{
auto
output_stride
=
output_features
.
stride
(
0
);
saveMean
[
plane
]
+=
input_features
[
ci
];
BatchNormalization_ForwardPass
<
T
>
(
}
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nPlanes
,
}
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
saveInvStd
.
data
<
T
>
(),
runningMean
.
data
<
T
>
(),
runningVar
.
data
<
T
>
(),
saveMean
[
plane
]
/=
nActive
;
OptionalTensorData
<
T
>
(
weight
),
OptionalTensorData
<
T
>
(
bias
),
eps
,
runningMean
[
plane
]
=
momentum
,
train
,
leakiness
);
momentum
*
runningMean
[
plane
]
+
(
1
-
momentum
)
*
saveMean
[
plane
];
}
for
(
Int
row
=
0
,
ci
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
)
{
saveInvStd
[
plane
]
+=
(
input_features
[
ci
]
-
saveMean
[
plane
])
*
(
input_features
[
ci
]
-
saveMean
[
plane
]);
// accumulate sum-squares
// before inverse square
// rooting
}
}
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
runningVar
[
plane
]
=
momentum
*
runningVar
[
plane
]
+
(
1
-
momentum
)
*
saveInvStd
[
plane
]
/
(
nActive
-
1
);
saveInvStd
[
plane
]
=
powf
(
saveInvStd
[
plane
]
/
nActive
+
eps
,
-
0.5
);
}
}
else
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
saveMean
[
plane
]
=
runningMean
[
plane
];
saveInvStd
[
plane
]
=
powf
(
runningVar
[
plane
]
+
eps
,
-
0.5
);
}
}
std
::
vector
<
T
>
w
(
nPlanes
);
std
::
vector
<
T
>
b
(
nPlanes
);
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
w
[
plane
]
=
saveInvStd
[
plane
]
*
(
weight
?
weight
[
plane
]
:
1
);
b
[
plane
]
=
-
saveMean
[
plane
]
*
w
[
plane
]
+
(
bias
?
bias
[
plane
]
:
0
);
}
for
(
Int
row
=
0
,
ci
=
0
,
co
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
,
co
+=
output_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
,
co
++
)
{
T
out
=
input_features
[
ci
]
*
w
[
plane
]
+
b
[
plane
];
out
=
(
out
>
0
)
?
out
:
(
out
*
leakiness
);
output_features
[
co
]
=
out
;
}
}
}
template
<
typename
T
>
void
BatchNormalization_BackwardPass
(
T
*
input_features
,
T
*
d_input_features
,
T
*
output_features
,
T
*
d_output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
nActive
,
T
*
saveMean
,
T
*
saveInvStd
,
T
*
runningMean
,
T
*
runningVar
,
T
*
weight
,
T
*
bias
,
T
*
d_weight
,
T
*
d_bias
,
T
leakiness
)
{
std
::
vector
<
T
>
gradMean
(
nPlanes
);
std
::
vector
<
T
>
dotp
(
nPlanes
);
std
::
vector
<
T
>
k
(
nPlanes
);
for
(
Int
row
=
0
,
ci
=
0
,
co
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
,
co
+=
output_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
,
co
++
)
{
T
d
=
d_output_features
[
co
];
d
=
(
output_features
[
co
]
>
0
)
?
d
:
(
d
*
leakiness
);
d_output_features
[
co
]
=
d
;
gradMean
[
plane
]
+=
d
;
dotp
[
plane
]
+=
(
input_features
[
ci
]
-
saveMean
[
plane
])
*
d
;
}
}
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
if
(
d_bias
)
d_bias
[
plane
]
=
gradMean
[
plane
];
// sum of grads, really, until ...
gradMean
[
plane
]
/=
nActive
;
// ...now
k
[
plane
]
=
dotp
[
plane
]
*
saveInvStd
[
plane
]
*
saveInvStd
[
plane
]
/
nActive
;
}
for
(
Int
row
=
0
,
ci
=
0
,
co
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
,
co
+=
output_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
,
co
++
)
{
d_input_features
[
ci
]
=
(
d_output_features
[
co
]
-
gradMean
[
plane
]
-
(
input_features
[
ci
]
-
saveMean
[
plane
])
*
k
[
plane
])
*
saveInvStd
[
plane
]
*
(
weight
?
weight
[
plane
]
:
1
);
}
}
}
if
(
d_weight
)
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
d_weight
[
plane
]
=
dotp
[
plane
]
*
saveInvStd
[
plane
];
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
cpu_BatchNormalization
InTensor
_updateOutput
(
void
cpu_BatchNormalization_updateOutput
(
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
output_features
,
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
output_features
,
/*float*/
at
::
Tensor
saveMean
,
/*float*/
at
::
Tensor
saveMean
,
/*float*/
at
::
Tensor
saveInvStd
,
/*float*/
at
::
Tensor
runningMean
,
/*float*/
at
::
Tensor
saveInvStd
,
/*float*/
at
::
Tensor
runningMean
,
/*float*/
at
::
Tensor
runningVar
,
/*float*/
at
::
Tensor
runningVar
,
/*float*/
at
::
Tensor
weight
,
/*float*/
at
::
Tensor
bias
,
T
eps
,
T
momentum
,
/*float*/
at
::
Tensor
weight
,
/*float*/
at
::
Tensor
bias
,
T
eps
,
T
momentum
,
bool
train
,
T
leakiness
)
{
bool
train
,
T
leakiness
)
{
output_features
.
resize_as_
(
input_features
);
if
(
input_features
.
ndimension
()
==
2
)
{
if
(
input_features
.
ndimension
()
==
2
)
{
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
input_stride
=
input_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
auto
output_stride
=
output_features
.
stride
(
0
);
BatchNormalization_ForwardPass
<
T
>
(
BatchNormalization_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nPlanes
,
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nPlanes
,
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
input_stride
,
output_stride
,
nActive
,
saveMean
.
data
<
T
>
(),
...
...
sparseconvnet/SCN/CPU/BatchNormalization.h
deleted
100644 → 0
View file @
f0407b36
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_BATCHNORMALIZATION_H
#define CPU_BATCHNORMALIZATION_H
#include <vector>
// in/output_stride is normally the same as nPlanes; allow other values to act
// on a subset of columns, i.e. an inplace DenseNet blocks
template
<
typename
T
>
void
BatchNormalization_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
nActive
,
T
*
saveMean
,
T
*
saveInvStd
,
T
*
runningMean
,
T
*
runningVar
,
T
*
weight
,
T
*
bias
,
T
eps
,
T
momentum
,
bool
train
,
T
leakiness
)
{
if
(
train
)
{
std
::
memset
(
saveMean
,
0
,
nPlanes
*
sizeof
(
T
));
std
::
memset
(
saveInvStd
,
0
,
nPlanes
*
sizeof
(
T
));
for
(
Int
row
=
0
,
ci
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
)
{
saveMean
[
plane
]
+=
input_features
[
ci
];
}
}
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
saveMean
[
plane
]
/=
nActive
;
runningMean
[
plane
]
=
momentum
*
runningMean
[
plane
]
+
(
1
-
momentum
)
*
saveMean
[
plane
];
}
for
(
Int
row
=
0
,
ci
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
)
{
saveInvStd
[
plane
]
+=
(
input_features
[
ci
]
-
saveMean
[
plane
])
*
(
input_features
[
ci
]
-
saveMean
[
plane
]);
// accumulate sum-squares
// before inverse square
// rooting
}
}
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
runningVar
[
plane
]
=
momentum
*
runningVar
[
plane
]
+
(
1
-
momentum
)
*
saveInvStd
[
plane
]
/
(
nActive
-
1
);
saveInvStd
[
plane
]
=
powf
(
saveInvStd
[
plane
]
/
nActive
+
eps
,
-
0.5
);
}
}
else
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
saveMean
[
plane
]
=
runningMean
[
plane
];
saveInvStd
[
plane
]
=
powf
(
runningVar
[
plane
]
+
eps
,
-
0.5
);
}
}
std
::
vector
<
T
>
w
(
nPlanes
);
std
::
vector
<
T
>
b
(
nPlanes
);
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
w
[
plane
]
=
saveInvStd
[
plane
]
*
(
weight
?
weight
[
plane
]
:
1
);
b
[
plane
]
=
-
saveMean
[
plane
]
*
w
[
plane
]
+
(
bias
?
bias
[
plane
]
:
0
);
}
for
(
Int
row
=
0
,
ci
=
0
,
co
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
,
co
+=
output_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
,
co
++
)
{
T
out
=
input_features
[
ci
]
*
w
[
plane
]
+
b
[
plane
];
out
=
(
out
>
0
)
?
out
:
(
out
*
leakiness
);
output_features
[
co
]
=
out
;
}
}
}
template
<
typename
T
>
void
BatchNormalization_BackwardPass
(
T
*
input_features
,
T
*
d_input_features
,
T
*
output_features
,
T
*
d_output_features
,
Int
nPlanes
,
Int
input_stride
,
Int
output_stride
,
Int
nActive
,
T
*
saveMean
,
T
*
saveInvStd
,
T
*
runningMean
,
T
*
runningVar
,
T
*
weight
,
T
*
bias
,
T
*
d_weight
,
T
*
d_bias
,
T
leakiness
)
{
std
::
vector
<
T
>
gradMean
(
nPlanes
);
std
::
vector
<
T
>
dotp
(
nPlanes
);
std
::
vector
<
T
>
k
(
nPlanes
);
for
(
Int
row
=
0
,
ci
=
0
,
co
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
,
co
+=
output_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
,
co
++
)
{
T
d
=
d_output_features
[
co
];
d
=
(
output_features
[
co
]
>
0
)
?
d
:
(
d
*
leakiness
);
d_output_features
[
co
]
=
d
;
gradMean
[
plane
]
+=
d
;
dotp
[
plane
]
+=
(
input_features
[
ci
]
-
saveMean
[
plane
])
*
d
;
}
}
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
if
(
d_bias
)
d_bias
[
plane
]
=
gradMean
[
plane
];
// sum of grads, really, until ...
gradMean
[
plane
]
/=
nActive
;
// ...now
k
[
plane
]
=
dotp
[
plane
]
*
saveInvStd
[
plane
]
*
saveInvStd
[
plane
]
/
nActive
;
}
for
(
Int
row
=
0
,
ci
=
0
,
co
=
0
;
row
<
nActive
;
row
++
,
ci
+=
input_stride
-
nPlanes
,
co
+=
output_stride
-
nPlanes
)
{
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
,
ci
++
,
co
++
)
{
d_input_features
[
ci
]
=
(
d_output_features
[
co
]
-
gradMean
[
plane
]
-
(
input_features
[
ci
]
-
saveMean
[
plane
])
*
k
[
plane
])
*
saveInvStd
[
plane
]
*
(
weight
?
weight
[
plane
]
:
1
);
}
}
if
(
d_weight
)
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
d_weight
[
plane
]
=
dotp
[
plane
]
*
saveInvStd
[
plane
];
}
}
#endif
/* CPU_BATCHNORMALIZATION_H */
sparseconvnet/SCN/CPU/BatchwiseMultiplicativeDropout.cpp
View file @
de3743f6
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
template
<
typename
T
>
template
<
typename
T
>
void
cpu_BatchwiseMultiplicativeDropout_updateOutput
(
void
cpu_BatchwiseMultiplicativeDropout_updateOutput
(
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
output_features
,
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
output_features
,
/*float*/
at
::
Tensor
noise
,
float
alpha
)
{
/*float*/
at
::
Tensor
noise
,
T
alpha
)
{
output_features
.
resize_as_
(
input_features
);
output_features
.
resize_as_
(
input_features
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
...
@@ -23,7 +23,7 @@ template <typename T>
...
@@ -23,7 +23,7 @@ template <typename T>
void
cpu_BatchwiseMultiplicativeDropout_updateGradInput
(
void
cpu_BatchwiseMultiplicativeDropout_updateGradInput
(
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
d_input_features
,
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
d_input_features
,
/*float*/
at
::
Tensor
d_output_features
,
/*float*/
at
::
Tensor
noise
,
/*float*/
at
::
Tensor
d_output_features
,
/*float*/
at
::
Tensor
noise
,
float
alpha
)
{
T
alpha
)
{
d_input_features
.
resize_as_
(
d_output_features
);
d_input_features
.
resize_as_
(
d_output_features
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nActive
=
input_features
.
size
(
0
);
auto
nPlanes
=
input_features
.
size
(
1
);
auto
nPlanes
=
input_features
.
size
(
1
);
...
...
sparseconvnet/SCN/CPU/Convolution.cpp
View file @
de3743f6
...
@@ -11,7 +11,9 @@ void rule_index_select(at::Tensor target, at::Tensor src, Int nRules,
...
@@ -11,7 +11,9 @@ void rule_index_select(at::Tensor target, at::Tensor src, Int nRules,
auto
t_ptr
=
target
.
data
<
T
>
();
auto
t_ptr
=
target
.
data
<
T
>
();
auto
s_ptr
=
src
.
data
<
T
>
();
auto
s_ptr
=
src
.
data
<
T
>
();
auto
n
=
target
.
size
(
1
);
auto
n
=
target
.
size
(
1
);
for
(
int
i
=
0
;
i
<
nRules
;
++
i
)
Int
i
;
#pragma omp parallel for private(i)
for
(
i
=
0
;
i
<
nRules
;
++
i
)
std
::
memcpy
(
t_ptr
+
i
*
n
,
s_ptr
+
rules
[
2
*
i
]
*
n
,
sizeof
(
T
)
*
n
);
std
::
memcpy
(
t_ptr
+
i
*
n
,
s_ptr
+
rules
[
2
*
i
]
*
n
,
sizeof
(
T
)
*
n
);
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -20,7 +22,9 @@ void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules,
...
@@ -20,7 +22,9 @@ void rule_index_add_(at::Tensor target, at::Tensor src, Int nRules,
auto
t_ptr
=
target
.
data
<
T
>
();
auto
t_ptr
=
target
.
data
<
T
>
();
auto
s_ptr
=
src
.
data
<
T
>
();
auto
s_ptr
=
src
.
data
<
T
>
();
auto
n
=
target
.
size
(
1
);
auto
n
=
target
.
size
(
1
);
for
(
int
i
=
0
;
i
<
nRules
;
++
i
)
{
Int
i
;
#pragma omp parallel for private(i)
for
(
i
=
0
;
i
<
nRules
;
++
i
)
{
auto
t
=
t_ptr
+
rules
[
2
*
i
]
*
n
;
auto
t
=
t_ptr
+
rules
[
2
*
i
]
*
n
;
auto
s
=
s_ptr
+
i
*
n
;
auto
s
=
s_ptr
+
i
*
n
;
for
(
int
j
=
0
;
j
<
n
;
++
j
)
for
(
int
j
=
0
;
j
<
n
;
++
j
)
...
...
sparseconvnet/SCN/CPU/IOLayers.cpp
View file @
de3743f6
...
@@ -4,7 +4,43 @@
...
@@ -4,7 +4,43 @@
// This source code is licensed under the license found in the
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
// LICENSE file in the root directory of this source tree.
#include "IOLayers.h"
#include <cstring>
// Assume output and d_input_features have been zero-ed
template
<
typename
T
>
void
InputLayer_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
nRows
,
Int
maxActive
,
Int
nPlanes
,
Int
*
rules
,
bool
average
)
{
for
(
Int
row
=
0
;
row
<
nRows
;
row
++
)
{
auto
nActive
=
rules
[
0
];
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
for
(
Int
i
=
1
;
i
<=
nActive
;
++
i
)
{
auto
in_f
=
input_features
+
nPlanes
*
rules
[
i
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
output_features
[
plane
]
+=
multiplier
*
in_f
[
plane
];
}
}
output_features
+=
nPlanes
;
rules
+=
1
+
maxActive
;
}
}
template
<
typename
T
>
void
InputLayer_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
Int
nRows
,
Int
maxActive
,
Int
nPlanes
,
Int
*
rules
,
bool
average
)
{
for
(
Int
row
=
0
;
row
<
nRows
;
row
++
)
{
auto
nActive
=
rules
[
0
];
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
for
(
Int
i
=
1
;
i
<=
nActive
;
++
i
)
{
auto
d_in_f
=
d_input_features
+
nPlanes
*
rules
[
i
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
d_in_f
[
plane
]
+=
multiplier
*
d_output_features
[
plane
];
}
d_output_features
+=
nPlanes
;
rules
+=
1
+
maxActive
;
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
void
cpu_InputLayer_updateOutput
(
Metadata
<
Dimension
>
&
m
,
void
cpu_InputLayer_updateOutput
(
Metadata
<
Dimension
>
&
m
,
...
@@ -26,8 +62,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -26,8 +62,8 @@ void cpu_InputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nRows
,
output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
...
@@ -47,8 +83,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -47,8 +83,8 @@ void cpu_InputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
d_input_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
d_output_features
.
data
<
T
>
(),
nRows
,
d_output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
...
@@ -69,8 +105,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -69,8 +105,8 @@ void cpu_OutputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
output_features
.
resize_
({
rules
[
0
][
2
],
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
<
T
>
(),
input_features
.
data
<
T
>
(),
nRows
,
input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
...
@@ -90,8 +126,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -90,8 +126,8 @@ void cpu_OutputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
nRows
,
d_input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
...
@@ -116,8 +152,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -116,8 +152,8 @@ void cpu_BLInputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
resize_
({
*
m
.
inputNActive
,
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
input_features
.
data
<
T
>
(),
output_features
.
data
<
T
>
(),
nRows
,
output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
...
@@ -139,8 +175,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -139,8 +175,8 @@ void cpu_BLInputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
d_input_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
d_input_features
.
data
<
T
>
(),
d_output_features
.
data
<
T
>
(),
nRows
,
d_output_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
nPlanes
,
&
rules
[
1
][
0
],
mode
==
4
);
}
}
}
}
...
@@ -162,8 +198,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
...
@@ -162,8 +198,8 @@ void cpu_BLOutputLayer_updateOutput(Metadata<Dimension> &m,
output_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
output_features
.
resize_
({
rules
[
0
][
2
],
rules
[
0
][
3
],
nPlanes
});
output_features
.
zero_
();
output_features
.
zero_
();
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
<
T
>
(),
InputLayer_BackwardPass
<
T
>
(
output_features
.
data
<
T
>
(),
input_features
.
data
<
T
>
(),
nRows
,
input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
template
<
typename
T
,
Int
Dimension
>
template
<
typename
T
,
Int
Dimension
>
...
@@ -184,7 +220,7 @@ void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
...
@@ -184,7 +220,7 @@ void cpu_BLOutputLayer_updateGradInput(Metadata<Dimension> &m,
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
resize_
({
nRows
,
nPlanes
});
d_input_features
.
zero_
();
d_input_features
.
zero_
();
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
<
T
>
(),
InputLayer_ForwardPass
<
T
>
(
d_output_features
.
data
<
T
>
(),
d_input_features
.
data
<
T
>
(),
nRows
,
d_input_features
.
data
<
T
>
(),
nRows
,
maxActive
,
maxActive
,
nPlanes
,
&
rules
[
1
][
0
],
false
);
nPlanes
,
&
rules
[
1
][
0
],
false
);
}
}
}
}
sparseconvnet/SCN/CPU/IOLayers.h
deleted
100644 → 0
View file @
f0407b36
// Copyright 2016-present, Facebook, Inc.
// All rights reserved.
//
// This source code is licensed under the license found in the
// LICENSE file in the root directory of this source tree.
#ifndef CPU_IOLAYERS_H
#define CPU_IOLAYERS_H
#include <cstring>
// Assume output and d_input_features have been zero-ed
template
<
typename
T
>
void
InputLayer_ForwardPass
(
T
*
input_features
,
T
*
output_features
,
Int
nRows
,
Int
maxActive
,
Int
nPlanes
,
Int
*
rules
,
bool
average
)
{
for
(
Int
row
=
0
;
row
<
nRows
;
row
++
)
{
auto
nActive
=
rules
[
0
];
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
for
(
Int
i
=
1
;
i
<=
nActive
;
++
i
)
{
auto
in_f
=
input_features
+
nPlanes
*
rules
[
i
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
{
output_features
[
plane
]
+=
multiplier
*
in_f
[
plane
];
}
}
output_features
+=
nPlanes
;
rules
+=
1
+
maxActive
;
}
}
template
<
typename
T
>
void
InputLayer_BackwardPass
(
T
*
d_input_features
,
T
*
d_output_features
,
Int
nRows
,
Int
maxActive
,
Int
nPlanes
,
Int
*
rules
,
bool
average
)
{
for
(
Int
row
=
0
;
row
<
nRows
;
row
++
)
{
auto
nActive
=
rules
[
0
];
T
multiplier
=
(
average
and
nActive
>
0
)
?
1.0
f
/
nActive
:
1.0
f
;
for
(
Int
i
=
1
;
i
<=
nActive
;
++
i
)
{
auto
d_in_f
=
d_input_features
+
nPlanes
*
rules
[
i
];
for
(
Int
plane
=
0
;
plane
<
nPlanes
;
plane
++
)
d_in_f
[
plane
]
+=
multiplier
*
d_output_features
[
plane
];
}
d_output_features
+=
nPlanes
;
rules
+=
1
+
maxActive
;
}
}
#endif
/* CPU_IOLAYERS_H */
sparseconvnet/SCN/CPU/LeakyReLU.cpp
View file @
de3743f6
...
@@ -6,8 +6,7 @@
...
@@ -6,8 +6,7 @@
template
<
typename
T
>
template
<
typename
T
>
void
cpu_LeakyReLU_updateOutput
(
/*float*/
at
::
Tensor
input_features
,
void
cpu_LeakyReLU_updateOutput
(
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
output_features
,
/*float*/
at
::
Tensor
output_features
,
T
alpha
)
{
float
alpha
)
{
output_features
.
resize_as_
(
input_features
);
output_features
.
resize_as_
(
input_features
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
auto
oF
=
output_features
.
data
<
T
>
();
...
@@ -20,7 +19,7 @@ template <typename T>
...
@@ -20,7 +19,7 @@ template <typename T>
void
cpu_LeakyReLU_updateGradInput
(
/*float*/
at
::
Tensor
input_features
,
void
cpu_LeakyReLU_updateGradInput
(
/*float*/
at
::
Tensor
input_features
,
/*float*/
at
::
Tensor
d_input_features
,
/*float*/
at
::
Tensor
d_input_features
,
/*float*/
at
::
Tensor
d_output_features
,
/*float*/
at
::
Tensor
d_output_features
,
float
alpha
)
{
T
alpha
)
{
d_input_features
.
resize_as_
(
d_output_features
);
d_input_features
.
resize_as_
(
d_output_features
);
auto
iF
=
input_features
.
data
<
T
>
();
auto
iF
=
input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
auto
diF
=
d_input_features
.
data
<
T
>
();
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment