Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Pytorch-Encoding
Commits
1177a80b
Commit
1177a80b
authored
Oct 04, 2017
by
Hang Zhang
Browse files
memory efficient implementation and test script
parent
8dd870b1
Changes
7
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1086 additions
and
209 deletions
+1086
-209
build.py
build.py
+4
-2
encoding/__init__.py
encoding/__init__.py
+240
-43
encoding/kernel/generic/encoding_kernel.c
encoding/kernel/generic/encoding_kernel.c
+608
-114
encoding/kernel/generic/encoding_kernel.h
encoding/kernel/generic/encoding_kernel.h
+40
-14
encoding/src/encoding_lib.h
encoding/src/encoding_lib.h
+81
-26
encoding/src/generic/encoding_generic.c
encoding/src/generic/encoding_generic.c
+99
-6
setup.py
setup.py
+14
-4
No files found.
build.py
View file @
1177a80b
...
...
@@ -28,9 +28,11 @@ else:
os
.
environ
[
'THC_LIBRARIES'
]
=
os
.
path
.
join
(
lib_path
,
'libTHC.so.1'
)
ENCODING_LIB
=
os
.
path
.
join
(
lib_path
,
'libENCODING.so'
)
clean_cmd
=
[
'bash'
,
'clean.sh'
]
subprocess
.
check_call
(
clean_cmd
)
build_all_cmd
=
[
'bash'
,
'encoding/make.sh'
]
if
subprocess
.
call
(
build_all_cmd
,
env
=
dict
(
os
.
environ
))
!=
0
:
sys
.
exit
(
1
)
subprocess
.
check_call
(
build_all_cmd
,
env
=
dict
(
os
.
environ
))
sources
=
[
'encoding/src/encoding_lib.cpp'
]
headers
=
[
'encoding/src/encoding_lib.h'
]
...
...
encoding/__init__.py
View file @
1177a80b
...
...
@@ -10,55 +10,253 @@
import
threading
import
torch
import
torch.cuda.nccl
as
nccl
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.autograd
import
Function
,
Variable
from
torch.nn.parameter
import
Parameter
from
._ext
import
encoding_lib
class
aggregateE
(
Function
):
def
forward
(
self
,
A
,
X
,
C
):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self
.
save_for_backward
(
A
,
X
,
C
)
B
,
N
,
K
=
A
.
size
()
D
=
X
.
size
(
2
)
with
torch
.
cuda
.
device_of
(
A
):
E
=
A
.
new
(
B
,
K
,
D
)
if
isinstance
(
A
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Float_aggregateE_forward
(
E
,
A
,
X
,
C
)
elif
isinstance
(
A
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Double_aggregateE_forward
(
E
,
A
,
X
,
C
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
return
E
def
backward
(
self
,
gradE
):
A
,
X
,
C
=
self
.
saved_tensors
with
torch
.
cuda
.
device_of
(
A
):
gradA
=
A
.
new
().
resize_as_
(
A
)
gradX
=
A
.
new
().
resize_as_
(
X
)
gradC
=
A
.
new
().
resize_as_
(
C
)
if
isinstance
(
A
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Float_aggregateE_backward
(
gradA
,
gradE
,
A
,
X
,
C
)
elif
isinstance
(
A
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Double_aggregateE_backward
(
gradA
,
gradE
,
A
,
X
,
C
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
gradX
.
copy_
(
torch
.
bmm
(
A
,
gradE
))
gradC
.
copy_
((
-
gradE
*
A
.
sum
(
1
).
unsqueeze
(
2
)).
sum
(
0
))
return
gradA
,
gradX
,
gradC
class
ScaledL2
(
Function
):
def
forward
(
self
,
X
,
C
,
S
):
B
,
N
,
D
=
X
.
size
()
K
=
C
.
size
(
0
)
with
torch
.
cuda
.
device_of
(
X
):
SL
=
X
.
new
(
B
,
N
,
K
)
if
isinstance
(
X
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
X
):
encoding_lib
.
Encoding_Float_scaledl2_forward
(
SL
,
X
,
C
,
S
)
elif
isinstance
(
X
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
X
):
encoding_lib
.
Encoding_Double_scaledl2_forward
(
SL
,
X
,
C
,
S
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
self
.
save_for_backward
(
X
,
C
,
S
,
SL
)
return
SL
def
backward
(
self
,
gradSL
):
X
,
C
,
S
,
SL
=
self
.
saved_tensors
K
=
C
.
size
(
0
)
with
torch
.
cuda
.
device_of
(
X
):
gradX
=
X
.
new
().
resize_as_
(
X
)
gradC
=
X
.
new
().
resize_as_
(
C
)
gradS
=
X
.
new
().
resize_as_
(
S
)
if
isinstance
(
X
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
X
):
encoding_lib
.
Encoding_Float_scaledl2_backward
(
gradSL
,
gradX
,
gradC
,
X
,
C
,
S
)
elif
isinstance
(
X
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
X
):
encoding_lib
.
Encoding_Double_scaledl2_backward
(
gradSL
,
gradX
,
gradC
,
X
,
C
,
S
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
gradS
.
copy_
((
gradSL
*
(
SL
/
S
.
view
(
1
,
1
,
K
))).
sum
(
0
).
sum
(
0
))
return
gradX
,
gradC
,
gradS
class
Encoding
(
nn
.
Module
):
def
__init__
(
self
,
D
,
K
):
super
(
Encoding
,
self
).
__init__
()
# init codewords and smoothing factor
self
.
D
,
self
.
K
=
D
,
K
self
.
codewords
=
nn
.
Parameter
(
torch
.
Tensor
(
K
,
D
),
requires_grad
=
True
)
self
.
scale
=
nn
.
Parameter
(
torch
.
Tensor
(
K
),
requires_grad
=
True
)
self
.
reset_params
()
def
reset_params
(
self
):
std1
=
1.
/
((
self
.
K
*
self
.
D
)
**
(
1
/
2
))
std2
=
1.
/
((
self
.
K
)
**
(
1
/
2
))
self
.
codewords
.
data
.
uniform_
(
-
std1
,
std1
)
self
.
scale
.
data
.
uniform_
(
-
std2
,
std2
)
def
forward
(
self
,
X
):
# input X is a 4D tensor
assert
(
X
.
size
(
1
)
==
self
.
D
,
"Encoding Layer wrong channels!"
)
if
X
.
dim
()
==
3
:
# BxDxN
B
,
N
,
K
,
D
=
X
.
size
(
0
),
X
.
size
(
2
),
self
.
K
,
self
.
D
X
=
X
.
transpose
(
1
,
2
).
contiguous
()
elif
X
.
dim
()
==
4
:
# BxDxHxW
B
,
N
,
K
,
D
=
X
.
size
(
0
),
X
.
size
(
2
)
*
X
.
size
(
3
),
self
.
K
,
self
.
D
X
=
X
.
view
(
B
,
D
,
-
1
).
transpose
(
1
,
2
).
contiguous
()
else
:
raise
RuntimeError
(
'Encoding Layer unknown input dims!'
)
# assignment weights
A
=
F
.
softmax
(
ScaledL2
()(
X
,
self
.
codewords
,
self
.
scale
))
# aggregate
E
=
aggregateE
()(
A
,
X
,
self
.
codewords
)
return
E
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'('
\
+
'N x '
+
str
(
self
.
D
)
+
'=>'
+
str
(
self
.
K
)
+
'x'
\
+
str
(
self
.
D
)
+
')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class
aggregate
(
Function
):
def
forward
(
self
,
A
,
R
):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
self
.
save_for_backward
(
A
,
R
)
B
,
N
,
K
,
D
=
R
.
size
()
with
torch
.
cuda
.
device_of
(
A
):
E
=
A
.
new
(
B
,
K
,
D
)
# TODO support cpu backend
if
isinstance
(
A
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Float_aggregate_forward
(
E
,
A
,
R
)
elif
isinstance
(
A
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Double_aggregate_forward
(
E
,
A
,
R
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
E
def
backward
(
self
,
gradE
):
A
,
R
=
self
.
saved_tensors
with
torch
.
cuda
.
device_of
(
A
):
gradA
=
A
.
new
().
resize_as_
(
A
)
gradR
=
R
.
new
().
resize_as_
(
R
)
if
isinstance
(
A
,
torch
.
cuda
.
FloatTensor
):
encoding_lib
.
Encoding_Float_aggregate_backward
(
gradA
,
gradR
,
gradE
,
A
,
R
)
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Float_aggregate_backward
(
gradA
,
gradR
,
gradE
,
A
,
R
)
elif
isinstance
(
A
,
torch
.
cuda
.
DoubleTensor
):
encoding_lib
.
Encoding_Double_aggregate_backward
(
gradA
,
gradR
,
gradE
,
A
,
R
)
with
torch
.
cuda
.
device_of
(
A
):
encoding_lib
.
Encoding_Double_aggregate_backward
(
gradA
,
gradR
,
gradE
,
A
,
R
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
gradA
,
gradR
class
residual
(
Function
):
def
forward
(
self
,
X
,
C
):
# X \in(BxNxD) D \in(KxD) R \in(BxNxKxD)
B
,
N
,
D
=
X
.
size
()
K
=
C
.
size
(
0
)
with
torch
.
cuda
.
device_of
(
X
):
R
=
X
.
new
(
B
,
N
,
K
,
D
)
if
isinstance
(
X
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
X
):
encoding_lib
.
Encoding_Float_residual_forward
(
R
,
X
,
C
)
elif
isinstance
(
X
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
X
):
encoding_lib
.
Encoding_Double_residual_forward
(
R
,
X
,
C
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
return
R
def
backward
(
self
,
gradR
):
B
,
N
,
K
,
D
=
gradR
.
size
()
with
torch
.
cuda
.
device_of
(
gradR
):
gradX
=
gradR
.
new
(
B
,
N
,
D
)
gradD
=
gradR
.
new
(
K
,
D
)
if
isinstance
(
gradR
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
gradR
):
encoding_lib
.
Encoding_Float_residual_backward
(
gradR
,
gradX
,
gradD
)
elif
isinstance
(
gradR
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
gradR
):
encoding_lib
.
Encoding_Double_residual_backward
(
gradR
,
gradX
,
gradD
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
return
gradX
,
gradD
class
square_squeeze
(
Function
):
def
forward
(
self
,
R
):
B
,
N
,
K
,
D
=
R
.
size
()
with
torch
.
cuda
.
device_of
(
R
):
L
=
R
.
new
(
B
,
N
,
K
)
if
isinstance
(
R
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
R
):
encoding_lib
.
Encoding_Float_squaresqueeze_forward
(
L
,
R
)
elif
isinstance
(
R
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
R
):
encoding_lib
.
Encoding_Double_squaresqueeze_forward
(
L
,
R
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
self
.
save_for_backward
(
L
,
R
)
return
L
def
backward
(
self
,
gradL
):
L
,
R
=
self
.
saved_tensors
B
,
N
,
K
,
D
=
R
.
size
()
with
torch
.
cuda
.
device_of
(
R
):
gradR
=
R
.
new
(
B
,
N
,
K
,
D
)
if
isinstance
(
R
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
gradL
):
encoding_lib
.
Encoding_Float_squaresqueeze_backward
(
gradL
,
gradR
,
R
)
elif
isinstance
(
R
,
torch
.
cuda
.
DoubleTensor
):
with
torch
.
cuda
.
device_of
(
gradL
):
encoding_lib
.
Encoding_Double_squaresqueeze_backward
(
gradL
,
gradR
,
R
)
else
:
raise
RuntimeError
(
'Unimplemented data type!'
)
return
gradR
def
assign
(
R
,
S
):
L
=
square_squeeze
()(
R
)
K
=
S
.
size
(
0
)
SL
=
L
*
S
.
view
(
1
,
1
,
K
)
return
F
.
softmax
(
SL
)
class
Aggregate
(
nn
.
Module
):
def
forward
(
self
,
A
,
R
):
return
aggregate
()(
A
,
R
)
class
Encoding
(
nn
.
Module
):
class
Encoding
P
(
nn
.
Module
):
def
__init__
(
self
,
D
,
K
):
super
(
Encoding
,
self
).
__init__
()
super
(
Encoding
P
,
self
).
__init__
()
# init codewords and smoothing factor
self
.
D
,
self
.
K
=
D
,
K
self
.
codewords
=
nn
.
Parameter
(
torch
.
Tensor
(
K
,
D
),
requires_grad
=
True
)
self
.
codewords
=
nn
.
Parameter
(
torch
.
Tensor
(
K
,
D
),
requires_grad
=
True
)
self
.
scale
=
nn
.
Parameter
(
torch
.
Tensor
(
K
),
requires_grad
=
True
)
self
.
softmax
=
nn
.
Softmax
()
self
.
reset_params
()
def
reset_params
(
self
):
...
...
@@ -69,34 +267,33 @@ class Encoding(nn.Module):
def
forward
(
self
,
X
):
# input X is a 4D tensor
assert
(
X
.
size
(
1
)
==
self
.
D
,
"Encoding Layer incompatible input channels!"
)
unpacked
=
False
assert
(
X
.
size
(
1
)
==
self
.
D
,
"Encoding Layer wrong channels!"
)
if
X
.
dim
()
==
3
:
unpacked
=
True
X
=
X
.
unsqueeze
(
0
)
# BxDxN
B
,
N
,
K
,
D
=
X
.
size
(
0
),
X
.
size
(
2
),
self
.
K
,
self
.
D
X
=
X
.
transpose
(
1
,
2
)
elif
X
.
dim
()
==
4
:
# BxDxHxW
B
,
N
,
K
,
D
=
X
.
size
(
0
),
X
.
size
(
2
)
*
X
.
size
(
3
),
self
.
K
,
self
.
D
# reshape input
X
=
X
.
view
(
B
,
D
,
-
1
).
transpose
(
1
,
2
)
else
:
raise
RuntimeError
(
'Encoding Layer unknown input dims!'
)
# calculate residuals
R
=
X
.
contiguous
().
view
(
B
,
N
,
1
,
D
).
expand
(
B
,
N
,
K
,
D
)
-
self
.
codewords
.
view
(
1
,
1
,
K
,
D
).
expand
(
B
,
N
,
K
,
D
)
R
=
residual
()(
X
.
contiguous
(),
self
.
codewords
)
# assignment weights
A
=
R
A
=
A
.
pow
(
2
).
sum
(
3
).
view
(
B
,
N
,
K
)
A
=
A
*
self
.
scale
.
view
(
1
,
1
,
K
).
expand_as
(
A
)
A
=
self
.
softmax
(
A
.
view
(
B
*
N
,
K
)).
view
(
B
,
N
,
K
)
A
=
assign
(
R
,
self
.
scale
)
# aggregate
E
=
aggregate
()(
A
,
R
)
if
unpacked
:
E
=
E
.
squeeze
(
0
)
return
E
def
__repr__
(
self
):
return
self
.
__class__
.
__name__
+
'('
\
+
'N x '
+
str
(
self
.
D
)
+
'=>'
+
str
(
self
.
K
)
+
'x'
+
str
(
self
.
D
)
+
')'
+
'N x '
+
str
(
self
.
D
)
+
'=>'
+
str
(
self
.
K
)
+
'x'
\
+
str
(
self
.
D
)
+
')'
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class
sum_square
(
Function
):
def
forward
(
ctx
,
input
):
ctx
.
save_for_backward
(
input
)
...
...
@@ -113,7 +310,7 @@ class sum_square(Function):
encoding_lib
.
Encoding_Double_sum_square_Forward
(
input
.
view
(
B
,
C
,
-
1
),
xsum
,
xsquare
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
xsum
,
xsquare
def
backward
(
ctx
,
gradSum
,
gradSquare
):
...
...
@@ -121,8 +318,6 @@ class sum_square(Function):
B
,
C
,
H
,
W
=
input
.
size
()
with
torch
.
cuda
.
device_of
(
input
):
gradInput
=
input
.
new
().
resize_
(
B
,
C
,
H
*
W
).
zero_
()
# gradSum.view(1,C,1,1).expand_as(input) + \
# 2*gradSquare.view(1,C,1,1).expand_as(input)*input
if
isinstance
(
input
,
torch
.
cuda
.
FloatTensor
):
with
torch
.
cuda
.
device_of
(
input
):
encoding_lib
.
Encoding_Float_sum_square_Backward
(
...
...
@@ -132,9 +327,10 @@ class sum_square(Function):
encoding_lib
.
Encoding_Double_sum_square_Backward
(
gradInput
,
input
.
view
(
B
,
C
,
-
1
),
gradSum
,
gradSquare
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
gradInput
.
view
(
B
,
C
,
H
,
W
)
class
batchnormtrain
(
Function
):
def
forward
(
ctx
,
input
,
gamma
,
beta
,
mean
,
std
):
ctx
.
save_for_backward
(
input
,
gamma
,
beta
,
mean
,
std
)
...
...
@@ -151,7 +347,7 @@ class batchnormtrain(Function):
encoding_lib
.
Encoding_Double_batchnorm_Forward
(
output
,
input
,
mean
,
invstd
,
gamma
,
beta
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
output
def
backward
(
ctx
,
gradOutput
):
...
...
@@ -177,9 +373,10 @@ class batchnormtrain(Function):
mean
,
invstd
,
gamma
,
beta
,
gradMean
,
gradStd
,
True
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
gradInput
,
gradGamma
,
gradBeta
,
gradMean
,
gradStd
class
batchnormeval
(
Function
):
def
forward
(
ctx
,
input
,
gamma
,
beta
,
mean
,
std
):
ctx
.
save_for_backward
(
input
,
gamma
,
beta
,
mean
,
std
)
...
...
@@ -196,7 +393,7 @@ class batchnormeval(Function):
encoding_lib
.
Encoding_Double_batchnorm_Forward
(
output
,
input
,
mean
,
invstd
,
gamma
,
beta
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
output
def
backward
(
ctx
,
gradOutput
):
...
...
@@ -221,6 +418,6 @@ class batchnormeval(Function):
mean
,
invstd
,
gamma
,
beta
,
gradMean
,
gradStd
,
False
)
else
:
raise
RuntimeError
(
'
u
nimplemented'
)
raise
RuntimeError
(
'
U
nimplemented
data type!
'
)
return
gradInput
,
gradGamma
,
gradBeta
,
gradMean
,
gradStd
encoding/kernel/generic/encoding_kernel.c
View file @
1177a80b
This diff is collapsed.
Click to expand it.
encoding/kernel/generic/encoding_kernel.h
View file @
1177a80b
...
...
@@ -12,11 +12,37 @@
#define THC_GENERIC_FILE "generic/encoding_kernel.h"
#else
void
Encoding_
(
Aggregate_Forward
)(
THCState
*
state
,
THCTensor
*
E_
,
void
Encoding_
(
AggregateE_Forward
)(
THCState
*
state
,
THCTensor
*
E_
,
THCTensor
*
A_
,
THCTensor
*
X_
,
THCTensor
*
C_
);
void
Encoding_
(
AggregateE_Backward
)(
THCState
*
state
,
THCTensor
*
GA_
,
THCTensor
*
GE_
,
THCTensor
*
A_
,
THCTensor
*
X_
,
THCTensor
*
C_
);
void
Encoding_
(
ScaledL2_Forward
)(
THCState
*
state
,
THCTensor
*
SL_
,
THCTensor
*
X_
,
THCTensor
*
C_
,
THCTensor
*
S_
);
void
Encoding_
(
ScaledL2_Backward
)(
THCState
*
state
,
THCTensor
*
GSL_
,
THCTensor
*
GX_
,
THCTensor
*
GC_
,
THCTensor
*
X_
,
THCTensor
*
C_
,
THCTensor
*
S_
);
void
Encoding_
(
Aggregate_Forward
)(
THCState
*
state
,
THCTensor
*
E_
,
THCTensor
*
A_
,
THCTensor
*
R_
);
void
Encoding_
(
Aggregate_Backward
)(
THCState
*
state
,
THCTensor
*
GA_
,
THCTensor
*
GR_
,
THCTensor
*
L_
,
THCTensor
*
A_
,
THCTensor
*
R_
);
void
Encoding_
(
Aggregate_Backward
)(
THCState
*
state
,
THCTensor
*
GA_
,
THCTensor
*
GR_
,
THCTensor
*
L_
,
THCTensor
*
A_
,
THCTensor
*
R_
);
void
Encoding_
(
Residual_Forward
)(
THCState
*
state
,
THCTensor
*
R_
,
THCTensor
*
X_
,
THCTensor
*
D_
);
void
Encoding_
(
Residual_Backward
)(
THCState
*
state
,
THCTensor
*
GR_
,
THCTensor
*
GX_
,
THCTensor
*
GD_
);
void
Encoding_
(
SquareSqueeze_Forward
)(
THCState
*
state
,
THCTensor
*
L_
,
THCTensor
*
R_
);
void
Encoding_
(
SquareSqueeze_Backward
)(
THCState
*
state
,
THCTensor
*
GL_
,
THCTensor
*
GR_
,
THCTensor
*
R_
);
void
Encoding_
(
BatchNorm_Forward
)(
THCState
*
state
,
THCTensor
*
output_
,
THCTensor
*
input_
,
...
...
encoding/src/encoding_lib.h
View file @
1177a80b
...
...
@@ -20,11 +20,36 @@
#include "THC/THCGenerateFloatType.h"
*/
int
Encoding_Float_scaledl2_forward
(
THCudaTensor
*
SL
,
THCudaTensor
*
X
,
THCudaTensor
*
C
,
THCudaTensor
*
S
);
int
Encoding_Float_scaledl2_backward
(
THCudaTensor
*
GSL
,
THCudaTensor
*
GX
,
THCudaTensor
*
GC
,
THCudaTensor
*
X
,
THCudaTensor
*
C
,
THCudaTensor
*
S
);
int
Encoding_Float_aggregateE_forward
(
THCudaTensor
*
E
,
THCudaTensor
*
A
,
THCudaTensor
*
X
,
THCudaTensor
*
C
);
int
Encoding_Float_aggregateE_backward
(
THCudaTensor
*
GA
,
THCudaTensor
*
GE
,
THCudaTensor
*
A
,
THCudaTensor
*
X
,
THCudaTensor
*
C
);
int
Encoding_Float_aggregate_forward
(
THCudaTensor
*
E
,
THCudaTensor
*
A
,
THCudaTensor
*
R
);
int
Encoding_Float_aggregate_backward
(
THCudaTensor
*
GA
,
THCudaTensor
*
GR
,
THCudaTensor
*
L
,
THCudaTensor
*
A
,
THCudaTensor
*
R
);
int
Encoding_Float_residual_forward
(
THCudaTensor
*
R
,
THCudaTensor
*
X
,
THCudaTensor
*
D
);
int
Encoding_Float_residual_backward
(
THCudaTensor
*
GR
,
THCudaTensor
*
GX
,
THCudaTensor
*
GD
);
int
Encoding_Float_squaresqueeze_forward
(
THCudaTensor
*
L
,
THCudaTensor
*
R
);
int
Encoding_Float_squaresqueeze_backward
(
THCudaTensor
*
GL
,
THCudaTensor
*
GR
,
THCudaTensor
*
R
);
int
Encoding_Float_batchnorm_Forward
(
THCudaTensor
*
output_
,
THCudaTensor
*
input_
,
THCudaTensor
*
mean_
,
THCudaTensor
*
invstd_
,
THCudaTensor
*
gamma_
,
THCudaTensor
*
beta_
);
...
...
@@ -43,25 +68,55 @@ void Encoding_Float_sum_square_Backward(
THCudaTensor
*
gradInput
,
THCudaTensor
*
input_
,
THCudaTensor
*
gradSum_
,
THCudaTensor
*
gradSquare_
);
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
*/
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int
Encoding_Double_aggregate_forward
(
THCudaDoubleTensor
*
E
,
THCudaDoubleTensor
*
A
,
THCudaDoubleTensor
*
R
);
int
Encoding_Double_scaledl2_forward
(
THCudaDoubleTensor
*
SL
,
THCudaDoubleTensor
*
X
,
THCudaDoubleTensor
*
C
,
THCudaDoubleTensor
*
S
);
int
Encoding_Double_scaledl2_backward
(
THCudaDoubleTensor
*
GSL
,
THCudaDoubleTensor
*
GX
,
THCudaDoubleTensor
*
GC
,
THCudaDoubleTensor
*
X
,
THCudaDoubleTensor
*
C
,
THCudaDoubleTensor
*
S
);
int
Encoding_Double_aggregateE_forward
(
THCudaDoubleTensor
*
E
,
THCudaDoubleTensor
*
A
,
THCudaDoubleTensor
*
X
,
THCudaDoubleTensor
*
C
);
int
Encoding_Double_aggregateE_backward
(
THCudaDoubleTensor
*
GA
,
THCudaDoubleTensor
*
GE
,
THCudaDoubleTensor
*
A
,
THCudaDoubleTensor
*
X
,
THCudaDoubleTensor
*
C
);
int
Encoding_Double_aggregate_forward
(
THCudaDoubleTensor
*
E
,
THCudaDoubleTensor
*
A
,
THCudaDoubleTensor
*
R
);
int
Encoding_Double_aggregate_backward
(
THCudaDoubleTensor
*
GA
,
THCudaDoubleTensor
*
GR
,
THCudaDoubleTensor
*
L
,
int
Encoding_Double_aggregate_backward
(
THCudaDoubleTensor
*
GA
,
THCudaDoubleTensor
*
GR
,
THCudaDoubleTensor
*
L
,
THCudaDoubleTensor
*
A
,
THCudaDoubleTensor
*
R
);
int
Encoding_Double_residual_forward
(
THCudaDoubleTensor
*
R
,
THCudaDoubleTensor
*
X
,
THCudaDoubleTensor
*
D
);
int
Encoding_Double_residual_backward
(
THCudaDoubleTensor
*
GR
,
THCudaDoubleTensor
*
GX
,
THCudaDoubleTensor
*
GD
);
int
Encoding_Double_squaresqueeze_forward
(
THCudaDoubleTensor
*
L
,
THCudaDoubleTensor
*
R
);
int
Encoding_Double_squaresqueeze_backward
(
THCudaDoubleTensor
*
GL
,
THCudaDoubleTensor
*
GR
,
THCudaDoubleTensor
*
R
);
int
Encoding_Double_batchnorm_Forward
(
THCudaDoubleTensor
*
output_
,
THCudaDoubleTensor
*
input_
,
THCudaDoubleTensor
*
mean_
,
THCudaDoubleTensor
*
invstd_
,
THCudaDoubleTensor
*
gamma_
,
THCudaDoubleTensor
*
beta_
);
THCudaDoubleTensor
*
invstd_
,
THCudaDoubleTensor
*
gamma_
,
THCudaDoubleTensor
*
beta_
);
int
Encoding_Double_batchnorm_Backward
(
THCudaDoubleTensor
*
gradoutput_
,
THCudaDoubleTensor
*
input_
,
THCudaDoubleTensor
*
gradinput_
,
THCudaDoubleTensor
*
gradgamma_
,
THCudaDoubleTensor
*
gradbeta_
,
THCudaDoubleTensor
*
mean_
,
THCudaDoubleTensor
*
invstd_
,
THCudaDoubleTensor
*
gamma_
,
THCudaDoubleTensor
*
beta_
,
THCudaDoubleTensor
*
gradMean_
,
THCudaDoubleTensor
*
gradStd_
,
int
train
);
THCudaDoubleTensor
*
gradMean_
,
THCudaDoubleTensor
*
gradStd_
,
int
train
);
int
Encoding_Double_sum_square_Forward
(
THCudaDoubleTensor
*
input_
,
THCudaDoubleTensor
*
sum_
,
THCudaDoubleTensor
*
square_
);
...
...
encoding/src/generic/encoding_generic.c
View file @
1177a80b
...
...
@@ -12,6 +12,56 @@
#define THC_GENERIC_FILE "generic/encoding_generic.c"
#else
int
Encoding_
(
scaledl2_forward
)(
THCTensor
*
SL
,
THCTensor
*
X
,
THCTensor
*
C
,
THCTensor
*
S
)
/*
* ScaledL2 operation
*/
{
Encoding_
(
ScaledL2_Forward
)(
state
,
SL
,
X
,
C
,
S
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
scaledl2_backward
)(
THCTensor
*
GSL
,
THCTensor
*
GX
,
THCTensor
*
GC
,
THCTensor
*
X
,
THCTensor
*
C
,
THCTensor
*
S
)
/*
* ScaledL2 operation
*/
{
Encoding_
(
ScaledL2_Backward
)(
state
,
GSL
,
GX
,
GC
,
X
,
C
,
S
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
aggregateE_forward
)(
THCTensor
*
E
,
THCTensor
*
A
,
THCTensor
*
X
,
THCTensor
*
C
)
/*
* Aggregate operation
*/
{
Encoding_
(
AggregateE_Forward
)(
state
,
E
,
A
,
X
,
C
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
aggregateE_backward
)(
THCTensor
*
GA
,
THCTensor
*
GE
,
THCTensor
*
A
,
THCTensor
*
X
,
THCTensor
*
C
)
/*
* Aggregate backward operation to A
* G (dl/dR), L (dl/dE), A (assignments)
*/
{
Encoding_
(
AggregateE_Backward
)(
state
,
GA
,
GE
,
A
,
X
,
C
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
aggregate_forward
)(
THCTensor
*
E
,
THCTensor
*
A
,
THCTensor
*
R
)
/*
...
...
@@ -35,6 +85,49 @@ int Encoding_(aggregate_backward)(THCTensor *GA, THCTensor *GR,
return
0
;
}
int
Encoding_
(
residual_forward
)(
THCTensor
*
R
,
THCTensor
*
X
,
THCTensor
*
D
)
/*
* Residual operation
*/
{
Encoding_
(
Residual_Forward
)(
state
,
R
,
X
,
D
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
residual_backward
)(
THCTensor
*
GR
,
THCTensor
*
GX
,
THCTensor
*
GD
)
/*
* Residual operation
*/
{
Encoding_
(
Residual_Backward
)(
state
,
GR
,
GX
,
GD
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
squaresqueeze_forward
)(
THCTensor
*
L
,
THCTensor
*
R
)
/*
* Residual operation
*/
{
Encoding_
(
SquareSqueeze_Forward
)(
state
,
L
,
R
);
/* C function return number of the outputs */
return
0
;
}
int
Encoding_
(
squaresqueeze_backward
)(
THCTensor
*
GL
,
THCTensor
*
GR
,
THCTensor
*
R
)
/*
* Residual operation
*/
{
Encoding_
(
SquareSqueeze_Backward
)(
state
,
GL
,
GR
,
R
);
/* C function return number of the outputs */
return
0
;
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int
Encoding_
(
batchnorm_Forward
)(
THCTensor
*
output_
,
THCTensor
*
input_
,
THCTensor
*
mean_
,
THCTensor
*
invstd_
,
THCTensor
*
gamma_
,
THCTensor
*
beta_
)
...
...
setup.py
View file @
1177a80b
...
...
@@ -10,19 +10,26 @@
import
os
import
sys
import
subprocess
from
setuptools
import
setup
,
find_packages
import
build
from
setuptools.command.develop
import
develop
from
setuptools.command.install
import
install
this_file
=
os
.
path
.
dirname
(
__file__
)
extra_compile_args
=
[
'-std=c++11'
,
'-Wno-write-strings'
]
#
extra_compile_args = ['-std=c++11', '-Wno-write-strings']
if
os
.
getenv
(
'PYTORCH_BINARY_BUILD'
)
and
platform
.
system
()
==
'Linux'
:
print
(
'PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux'
)
extra_compile_args
+=
[
'-static-libstdc++'
]
extra_link_args
+=
[
'-static-libstdc++'
]
class
TestCommand
(
install
):
"""Post-installation mode."""
def
run
(
self
):
install
.
run
(
self
)
subprocess
.
check_call
(
"python test/test.py"
.
split
())
setup
(
name
=
"encoding"
,
version
=
"0.0.1"
,
...
...
@@ -35,11 +42,14 @@ setup(
setup_requires
=
[
"cffi>=1.0.0"
],
# Exclude the build files.
packages
=
find_packages
(
exclude
=
[
"build"
]),
extra_compile_args
=
extra_compile_args
,
#
extra_compile_args=extra_compile_args,
# Package where to put the extensions. Has to be a prefix of build.py.
ext_package
=
""
,
# Extensions to compile.
cffi_modules
=
[
os
.
path
.
join
(
this_file
,
"build.py:ffi"
)
],
cmdclass
=
{
'install'
:
TestCommand
,
},
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment