Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
168574bd
Commit
168574bd
authored
Apr 19, 2016
by
Davis King
Browse files
Added visit_layer_parameter_gradients() and also fixed a silly synchronization
error in the multi-gpu training code.
parent
d31723ff
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
5 deletions
+138
-5
dlib/dnn/core.h
dlib/dnn/core.h
+67
-0
dlib/dnn/core_abstract.h
dlib/dnn/core_abstract.h
+34
-0
dlib/dnn/trainer.h
dlib/dnn/trainer.h
+37
-5
No files found.
dlib/dnn/core.h
View file @
168574bd
...
...
@@ -3049,6 +3049,73 @@ namespace dlib
impl
::
vlp_loop
<
0
,
net_type
::
num_layers
>::
visit
(
comp_i
,
net
,
v
);
}
// ----------------------------------------------------------------------------------------
namespace
impl
{
template
<
size_t
i
,
size_t
num
>
struct
vlpg_loop
{
template
<
typename
T
,
typename
U
>
static
typename
std
::
enable_if
<!
is_add_layer
<
U
>::
value
>::
type
invoke_functor
(
T
&&
,
size_t
&
,
U
&&
)
{
// intentionally left empty
}
template
<
typename
T
,
typename
U
>
static
typename
std
::
enable_if
<
is_add_layer
<
U
>::
value
>::
type
invoke_functor
(
T
&&
v
,
size_t
&
comp_i
,
U
&&
l
)
{
v
(
comp_i
,
l
.
get_parameter_gradient
());
++
comp_i
;
}
template
<
typename
net_type
,
typename
visitor
>
static
void
visit
(
size_t
comp_i
,
net_type
&
net
,
visitor
&&
v
)
{
invoke_functor
(
v
,
comp_i
,
layer
<
i
>
(
net
));
vlpg_loop
<
i
+
1
,
num
>::
visit
(
comp_i
,
net
,
v
);
}
};
template
<
size_t
num
>
struct
vlpg_loop
<
num
,
num
>
{
template
<
typename
net_type
,
typename
visitor
>
static
void
visit
(
size_t
,
net_type
&
,
visitor
&&
)
{
// Base case of recursion. Don't do anything.
}
};
}
template
<
typename
net_type
,
typename
visitor
>
void
visit_layer_parameter_gradients
(
net_type
&
net
,
visitor
v
)
{
size_t
comp_i
=
0
;
impl
::
vlpg_loop
<
0
,
net_type
::
num_layers
>::
visit
(
comp_i
,
net
,
v
);
}
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/core_abstract.h
View file @
168574bd
...
...
@@ -1348,6 +1348,40 @@ namespace dlib
- When v() is called, the first argument is always < net_type::num_computational_layers.
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
net_type
,
typename
visitor
>
void
visit_layer_parameter_gradients
(
net_type
&
net
,
visitor
v
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, tensor& t)
ensures
- Loops over all the computational layers (i.e. layers with parameters, as
opposed to loss, tag, or input layers) in net and passes their parameter
gradients to v(). To be specific, this function essentially performs the
following:
size_t computational_layer_idx = 0;
for (size_t i = 0; i < net_type::num_layers; ++i)
{
if (layer<i>(net) is a computational layer)
{
v(computational_layer_idx, layer<i>(net).get_parameter_gradient());
++computational_layer_idx;
}
}
- When v() is called, the first argument is always < net_type::num_computational_layers.
!*/
// ----------------------------------------------------------------------------------------
struct
layer_test_results
...
...
dlib/dnn/trainer.h
View file @
168574bd
...
...
@@ -501,9 +501,12 @@ namespace dlib
std
::
vector
<
std
::
future
<
double
>>
losses
(
devices
.
size
());
std
::
vector
<
std
::
future
<
void
>>
update_futs
(
devices
.
size
());
std
::
vector
<
matrix
<
float
>>
param_buffer
(
net_type
::
num_computational_layers
);
std
::
vector
<
matrix
<
float
>>
param_grad_buffer
(
net_type
::
num_computational_layers
);
size_t
iteration
=
0
;
while
(
job_pipe
.
dequeue
(
next_job
))
{
++
iteration
;
// Call compute_parameter_gradients() and update_parameters() but pick the
// right version for unsupervised or supervised training based on the type
// of label_type.
...
...
@@ -517,17 +520,45 @@ namespace dlib
// gradient updates between devices. So we do that now.
if
(
devices
.
size
()
>
1
)
{
for
(
auto
&&
p
:
param_buffer
)
for
(
auto
&&
p
:
param_
grad_
buffer
)
p
=
0
;
// now average all the parameter gradients
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameter_gradients
(
devices
[
i
]
->
net
,
[
&
param_grad_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
param_grad_buffer
[
j
]
+=
mat
(
t
);
});
}
// and then assign the parameter gradients back to all the networks
const
float
scale
=
1.0
f
/
devices
.
size
();
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameter_gradients
(
devices
[
i
]
->
net
,
[
scale
,
&
param_grad_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
{
t
=
param_grad_buffer
[
j
]
*
scale
;
t
.
async_copy_to_device
();
}
});
}
// Evey now and then force all the parameters to be the same just to
// make sure they aren't drifting apart due to any non-deterministic
// behavior on the GPU.
if
(
iteration
%
5000
==
1
)
{
for
(
auto
&&
p
:
param_buffer
)
p
=
0
;
// now average all the parameters
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameters
(
devices
[
i
]
->
net
,
[
&
param_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
param_buffer
[
j
]
+=
mat
(
t
);
});
}
// and then assign the parameter
gradient
s back to all the networks
// and then assign the parameters back to all the networks
.
const
float
scale
=
1.0
f
/
devices
.
size
();
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
...
...
@@ -540,6 +571,7 @@ namespace dlib
});
}
}
}
// Now apply all the updates to each device.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment