Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
669a1e17
Commit
669a1e17
authored
Jan 23, 2016
by
Davis King
Browse files
Added affine_transform_conv() and multiply_conv() as well as a CPU
implementation of assign_conv_bias_gradient().
parent
e44b2aa2
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
328 additions
and
3 deletions
+328
-3
dlib/dnn/cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+124
-1
dlib/dnn/cpu_dlib.h
dlib/dnn/cpu_dlib.h
+20
-0
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+88
-0
dlib/dnn/cuda_dlib.h
dlib/dnn/cuda_dlib.h
+15
-0
dlib/dnn/tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+30
-2
dlib/dnn/tensor_tools.h
dlib/dnn/tensor_tools.h
+51
-0
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
669a1e17
...
...
@@ -54,6 +54,57 @@ namespace dlib
}
}
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
)
{
auto
d
=
dest
.
host
();
auto
s1
=
src1
.
host
();
auto
s2
=
src2
.
host
();
if
(
have_same_dimensions
(
dest
,
src1
))
{
DLIB_CASSERT
(
src2
.
num_samples
()
==
1
&&
src2
.
nr
()
==
1
&&
src2
.
nc
()
==
1
&&
src2
.
k
()
==
src1
.
k
(),
""
);
for
(
long
n
=
0
;
n
<
dest
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
dest
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
dest
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
dest
.
nc
();
++
c
)
{
*
d
++
=
(
*
s1
++
)
*
s2
[
k
];
}
}
}
}
}
else
{
DLIB_CASSERT
(
have_same_dimensions
(
src1
,
src2
),
""
);
DLIB_CASSERT
(
dest
.
num_samples
()
==
1
&&
dest
.
nr
()
==
1
&&
dest
.
nc
()
==
1
&&
dest
.
k
()
==
src1
.
k
(),
""
);
for
(
long
k
=
0
;
k
<
src1
.
k
();
++
k
)
d
[
k
]
=
0
;
for
(
long
n
=
0
;
n
<
src1
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
src1
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
src1
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
src1
.
nc
();
++
c
)
{
d
[
k
]
+=
(
*
s1
++
)
*
(
*
s2
++
);
}
}
}
}
}
}
void
add
(
float
beta
,
tensor
&
dest
,
...
...
@@ -196,6 +247,44 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
void
assign_conv_bias_gradient
(
tensor
&
grad
,
const
tensor
&
gradient_input
)
{
DLIB_CASSERT
(
grad
.
num_samples
()
==
1
&&
grad
.
k
()
>=
1
&&
grad
.
nr
()
==
1
&&
grad
.
nc
()
==
1
&&
gradient_input
.
k
()
==
grad
.
k
()
&&
gradient_input
.
size
()
>
0
&&
is_same_object
(
grad
,
gradient_input
)
==
false
,
""
);
auto
g
=
grad
.
host
();
auto
gi
=
gradient_input
.
host
();
for
(
long
k
=
0
;
k
<
gradient_input
.
k
();
++
k
)
g
[
k
]
=
0
;
for
(
long
n
=
0
;
n
<
gradient_input
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
gradient_input
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
gradient_input
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
gradient_input
.
nc
();
++
c
)
{
g
[
k
]
+=
(
*
gi
++
);
}
}
}
}
}
// -----------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -293,6 +382,41 @@ namespace dlib
}
}
// -----------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
A
,
B
),
""
);
DLIB_CASSERT
(
A
.
num_samples
()
==
1
&&
A
.
nr
()
==
1
&&
A
.
nc
()
==
1
&&
A
.
k
()
==
src
.
k
(),
""
);
auto
d
=
dest
.
host
();
auto
s
=
src
.
host
();
const
auto
a
=
A
.
host
();
const
auto
b
=
B
.
host
();
for
(
long
n
=
0
;
n
<
dest
.
num_samples
();
++
n
)
{
for
(
long
k
=
0
;
k
<
dest
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
dest
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
dest
.
nc
();
++
c
)
{
*
d
++
=
a
[
k
]
*
(
*
s
++
)
+
b
[
k
];
}
}
}
}
}
// -----------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
@@ -1238,7 +1362,6 @@ namespace dlib
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/cpu_dlib.h
View file @
669a1e17
...
...
@@ -25,6 +25,12 @@ namespace dlib
const
tensor
&
src2
);
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
);
void
add
(
float
beta
,
tensor
&
dest
,
...
...
@@ -43,6 +49,11 @@ namespace dlib
const
tensor
&
src2
);
void
assign_conv_bias_gradient
(
tensor
&
grad
,
const
tensor
&
gradient_input
);
// -----------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -81,6 +92,15 @@ namespace dlib
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
dlib/dnn/cuda_dlib.cu
View file @
669a1e17
...
...
@@ -94,6 +94,68 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
__global__
void
_cuda_multiply_conv
(
float
*
d
,
const
float
*
s1
,
size_t
n
,
const
float
*
s2
,
size_t
bs
,
size_t
ks
)
{
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
auto
k
=
(
i
/
bs
)
%
ks
;
d
[
i
]
=
s1
[
i
]
*
s2
[
k
];
}
}
__global__
void
_cuda_multiply_conv2
(
float
*
d
,
const
float
*
s1
,
size_t
n
,
const
float
*
s2
,
size_t
bs
,
size_t
ks
)
{
// zero initialize d before we begin.
for
(
auto
i
:
grid_stride_range
(
0
,
ks
))
d
[
i
]
=
0
;
__syncthreads
();
// loop over all the image planes
for
(
auto
i
:
grid_stride_range_y
(
0
,
n
))
{
// sum all the elements in the i-th image plane
float
temp
=
0
;
for
(
auto
j
:
grid_stride_range
(
i
*
bs
,
(
i
+
1
)
*
bs
))
temp
+=
s1
[
j
]
*
s2
[
j
];
auto
k
=
i
%
ks
;
// and store the sum into d[k]
warp_reduce_atomic_add
(
d
[
k
],
temp
);
}
}
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
)
{
if
(
have_same_dimensions
(
dest
,
src1
))
{
DLIB_CASSERT
(
src2
.
num_samples
()
==
1
&&
src2
.
nr
()
==
1
&&
src2
.
nc
()
==
1
&&
src2
.
k
()
==
src1
.
k
(),
""
);
if
(
dest
.
size
()
==
0
)
return
;
launch_kernel
(
_cuda_multiply_conv
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src1
.
device
(),
src1
.
size
(),
src2
.
device
(),
src1
.
nr
()
*
src1
.
nc
(),
src1
.
k
());
}
else
{
DLIB_CASSERT
(
have_same_dimensions
(
src1
,
src2
),
""
);
DLIB_CASSERT
(
dest
.
num_samples
()
==
1
&&
dest
.
nr
()
==
1
&&
dest
.
nc
()
==
1
&&
dest
.
k
()
==
src1
.
k
(),
""
);
if
(
dest
.
size
()
==
0
)
return
;
dim3
blocks
(
10
,
1
);
dim3
threads
(
32
,
32
);
// x size must be 32 because we are using warp_reduce_atomic_add() in the kernel.
_cuda_multiply_conv2
<<<
blocks
,
threads
>>>
(
dest
.
device
(),
src1
.
device
(),
src1
.
num_samples
()
*
src1
.
k
(),
src2
.
device
(),
src1
.
nr
()
*
src1
.
nc
(),
src1
.
k
());
}
}
// ------------------------------------------------------------------------------------
__global__
void
_cuda_add1
(
float
*
d
,
const
float
*
s1
,
const
float
*
s2
,
size_t
n
)
...
...
@@ -302,6 +364,32 @@ namespace dlib
}
}
// -----------------------------------------------------------------------------------
__global__
void
_cuda_affine_transform_conv
(
float
*
d
,
const
float
*
s
,
size_t
n
,
const
float
*
A
,
const
float
*
B
,
size_t
bs
,
size_t
ks
)
{
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
auto
k
=
(
i
/
bs
)
%
ks
;
d
[
i
]
=
A
[
k
]
*
s
[
i
]
+
B
[
k
];
}
}
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
)
{
DLIB_CASSERT
(
have_same_dimensions
(
dest
,
src
),
""
);
DLIB_CASSERT
(
have_same_dimensions
(
A
,
B
),
""
);
DLIB_CASSERT
(
A
.
num_samples
()
==
1
&&
A
.
nr
()
==
1
&&
A
.
nc
()
==
1
&&
A
.
k
()
==
src
.
k
(),
""
);
launch_kernel
(
_cuda_affine_transform_conv
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src
.
device
(),
src
.
size
(),
A
.
device
(),
B
.
device
(),
src
.
nr
()
*
src
.
nc
(),
src
.
k
());
}
// -----------------------------------------------------------------------------------
__global__
void
_add_bias_gradient
(
float
*
out
,
const
float
*
in
,
size_t
n
,
size_t
total_n
)
...
...
dlib/dnn/cuda_dlib.h
View file @
669a1e17
...
...
@@ -30,6 +30,12 @@ namespace dlib
const
tensor
&
src2
);
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
);
void
add
(
tensor
&
dest
,
const
tensor
&
src1
,
...
...
@@ -82,6 +88,15 @@ namespace dlib
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
assign_bias_gradient
(
...
...
dlib/dnn/tensor_tools.cpp
View file @
669a1e17
...
...
@@ -115,6 +115,19 @@ namespace dlib { namespace tt
}
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
)
{
#ifdef DLIB_USE_CUDA
cuda
::
multiply_conv
(
dest
,
src1
,
src2
);
#else
cpu
::
multiply_conv
(
dest
,
src1
,
src2
);
#endif
}
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -181,6 +194,22 @@ namespace dlib { namespace tt
#endif
}
// ----------------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
)
{
#ifdef DLIB_USE_CUDA
cuda
::
affine_transform_conv
(
dest
,
src
,
A
,
B
);
#else
cpu
::
affine_transform_conv
(
dest
,
src
,
A
,
B
);
#endif
}
// ----------------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
@@ -362,8 +391,7 @@ namespace dlib { namespace tt
#ifdef DLIB_USE_CUDA
cuda
::
assign_conv_bias_gradient
(
grad
,
gradient_input
);
#else
// TODO
DLIB_CASSERT
(
false
,
""
);
cpu
::
assign_conv_bias_gradient
(
grad
,
gradient_input
);
#endif
}
...
...
dlib/dnn/tensor_tools.h
View file @
669a1e17
...
...
@@ -118,6 +118,34 @@ namespace dlib { namespace tt
with num_samples()==1 which is then assigned to #dest.
!*/
void
multiply_conv
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
);
/*!
requires
- if (have_same_dimensions(dest, src1) == true) then
- src2.num_samples() == 1
- src2.nr() == 1
- src2.nc() == 1
- src2.k() == src1.k()
- else
- have_same_dimensions(src1, src2) == true)
- dest.num_samples() == 1
- dest.nr() == 1
- dest.nc() == 1
- dest.k() == src1.k()
ensures
- Performs #dest == src1*src2
In particular, if the elements of dest, src1, and src2 were indexed by (n,k,r,c) then
we would have:
- if (have_same_dimensions(dest,src1)) then
#dest(n,k,r,c) == src1(n,k,r,c)*src2(k)
- else
#dest(k) == sum over {n,r,c} of src1(n,k,r,c)*src2(n,k,r,c)
!*/
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
@@ -196,6 +224,29 @@ namespace dlib { namespace tt
- #dest.host()[i] == A.host()[i]*src.host()[i] + B.host()[i]
!*/
// ----------------------------------------------------------------------------------------
void
affine_transform_conv
(
tensor
&
dest
,
const
tensor
&
src
,
const
tensor
&
A
,
const
tensor
&
B
);
/*!
requires
- have_same_dimensions(dest,src) == true
- have_same_dimensions(A, B) == true
- A.num_samples() == 1
- A.nr() == 1
- A.nc() == 1
- A.k() == src.k()
ensures
- Performs #dest == A*src + B
In particular, if the elements of dest and src were indexed by (n,k,r,c) then
we would have:
#dest(n,k,r,c) == A(k)*src(n,k,r,c) + B(k).
!*/
// ----------------------------------------------------------------------------------------
void
batch_normalize_inference
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment