Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
428b0bb8
Commit
428b0bb8
authored
Apr 29, 2016
by
Davis King
Browse files
Made multi-gpu mode use GPUDirect rather than copying through the CPU.
parent
0d6e3f12
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
37 additions
and
24 deletions
+37
-24
dlib/dnn/trainer.h
dlib/dnn/trainer.h
+37
-24
No files found.
dlib/dnn/trainer.h
View file @
428b0bb8
...
...
@@ -500,8 +500,30 @@ namespace dlib
std
::
vector
<
std
::
future
<
double
>>
losses
(
devices
.
size
());
std
::
vector
<
std
::
future
<
void
>>
update_futs
(
devices
.
size
());
std
::
vector
<
matrix
<
float
>>
param_buffer
(
net_type
::
num_computational_layers
);
std
::
vector
<
matrix
<
float
>>
param_grad_buffer
(
net_type
::
num_computational_layers
);
std
::
vector
<
tt
::
multi_device_tensor_averager
>
averagers
(
net_type
::
num_computational_layers
);
if
(
devices
.
size
()
>
1
)
{
// setup the averagers to point to the tensors in the networks.
std
::
vector
<
std
::
vector
<
tensor
*>>
all_tensors
(
devices
.
size
());
for
(
size_t
i
=
0
;
i
<
all_tensors
.
size
();
++
i
)
{
all_tensors
[
i
].
resize
(
net_type
::
num_computational_layers
);
visit_layer_parameter_gradients
(
devices
[
i
]
->
net
,
[
&
](
size_t
j
,
tensor
&
t
){
all_tensors
[
i
][
j
]
=
&
t
;
});
}
// Now set each averager to average the tensors at the same layer in each
// network.
for
(
size_t
i
=
0
;
i
<
net_type
::
num_computational_layers
;
++
i
)
{
std
::
vector
<
tensor
*>
temp
(
all_tensors
.
size
());
for
(
size_t
j
=
0
;
j
<
all_tensors
.
size
();
++
j
)
temp
[
j
]
=
all_tensors
[
j
][
i
];
averagers
[
i
].
set
(
temp
);
}
}
size_t
iteration
=
0
;
while
(
job_pipe
.
dequeue
(
next_job
))
...
...
@@ -522,32 +544,22 @@ namespace dlib
// gradient updates between devices. So we do that now.
if
(
devices
.
size
()
>
1
)
{
for
(
auto
&&
p
:
param_grad_buffer
)
p
=
0
;
// now average all the parameter gradients
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameter_gradients
(
devices
[
i
]
->
net
,
[
&
param_grad_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
param_grad_buffer
[
j
]
+=
mat
(
t
);
});
}
// and then assign the parameter gradients back to all the networks
const
float
scale
=
1.0
f
/
devices
.
size
();
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
visit_layer_parameter_gradients
(
devices
[
i
]
->
net
,
[
scale
,
&
param_grad_buffer
](
size_t
j
,
tensor
&
t
)
{
if
(
t
.
size
()
!=
0
)
{
t
=
param_grad_buffer
[
j
]
*
scale
;
t
.
async_copy_to_device
();
}
});
}
for
(
auto
&&
d
:
devices
)
cuda
::
device_synchronize
(
d
->
device_id
);
for
(
auto
&&
avg
:
averagers
)
avg
.
average
();
/*
for (auto&& d : devices)
cuda::device_synchronize(d->device_id);
*/
// Evey now and then force all the parameters to be the same just to
// make sure they aren't drifting apart due to any non-deterministic
// behavior on the GPU.
/*
if (iteration%5000 == 1)
{
for (auto&& p : param_buffer)
...
...
@@ -573,6 +585,7 @@ namespace dlib
});
}
}
*/
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment