Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
49a5d39d
"examples/vscode:/vscode.git/clone" did not exist on "8aac1f99d7af5873db7d23c07fba370d0f5061a6"
Commit
49a5d39d
authored
Apr 04, 2018
by
Davis King
Browse files
Made loss_multiclass_log_per_pixel use CUDA.
parent
6c962dd9
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
139 additions
and
0 deletions
+139
-0
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+66
-0
dlib/dnn/cuda_dlib.h
dlib/dnn/cuda_dlib.h
+61
-0
dlib/dnn/loss.h
dlib/dnn/loss.h
+12
-0
No files found.
dlib/dnn/cuda_dlib.cu
View file @
49a5d39d
...
...
@@ -3,6 +3,7 @@
#include "cuda_utils.h"
#include "cuda_dlib.h"
#include "cudnn_dlibapi.h"
namespace
dlib
...
...
@@ -1623,6 +1624,71 @@ namespace dlib
}
}
// ----------------------------------------------------------------------------------------
__device__
float
cuda_safe_log
(
float
x
,
float
epsilon
=
1e-10
)
{
// Prevent trying to calculate the logarithm of a very small number (let alone zero)
if
(
x
>=
epsilon
)
return
::
log
(
x
);
else
return
::
log
(
epsilon
);
}
__global__
void
_cuda_compute_loss_multiclass_log_per_pixel
(
float
*
loss_out
,
float
*
g
,
const
uint16_t
*
truth
,
size_t
n
,
size_t
plane_size
,
size_t
sample_size
,
size_t
nk
,
uint16_t
label_to_ignore
,
const
float
scale
)
{
float
loss
=
0
;
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
const
size_t
k
=
(
i
/
plane_size
)
%
nk
;
const
size_t
idx
=
(
i
%
plane_size
)
+
plane_size
*
(
i
/
sample_size
);
const
size_t
y
=
truth
[
idx
];
if
(
k
==
y
)
{
loss
-=
cuda_safe_log
(
g
[
i
]);
g
[
i
]
=
scale
*
(
g
[
i
]
-
1
);
}
else
if
(
y
==
label_to_ignore
)
{
g
[
i
]
=
0.
f
;
}
else
{
g
[
i
]
=
scale
*
g
[
i
];
}
}
warp_reduce_atomic_add
(
*
loss_out
,
loss
);
}
void
compute_loss_multiclass_log_per_pixel
::
do_work
(
float
*
loss_cuda_work_buffer
,
const
uint16_t
*
truth_buffer
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
)
{
CHECK_CUDA
(
cudaMemset
(
loss_cuda_work_buffer
,
0
,
sizeof
(
float
)));
softmax
(
gradient
,
subnetwork_output
);
static
const
uint16_t
label_to_ignore
=
std
::
numeric_limits
<
uint16_t
>::
max
();
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
const
double
scale
=
1.0
/
(
subnetwork_output
.
num_samples
()
*
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
());
launch_kernel
(
_cuda_compute_loss_multiclass_log_per_pixel
,
max_jobs
(
gradient
.
size
()),
loss_cuda_work_buffer
,
gradient
.
device
(),
truth_buffer
,
gradient
.
size
(),
gradient
.
nr
()
*
gradient
.
nc
(),
gradient
.
nr
()
*
gradient
.
nc
()
*
gradient
.
k
(),
gradient
.
k
(),
label_to_ignore
,
scale
);
float
floss
;
CHECK_CUDA
(
cudaMemcpy
(
&
floss
,
loss_cuda_work_buffer
,
sizeof
(
float
),
cudaMemcpyDefault
));
loss
=
scale
*
floss
;
}
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/cuda_dlib.h
View file @
49a5d39d
...
...
@@ -409,6 +409,67 @@ namespace dlib
size_t
count_k
);
// ----------------------------------------------------------------------------------------
class
compute_loss_multiclass_log_per_pixel
{
/*!
The point of this class is to compute the loss computed by
loss_multiclass_log_per_pixel, but to do so with CUDA.
!*/
public:
compute_loss_multiclass_log_per_pixel
(
)
{
work
=
device_global_buffer
();
}
template
<
typename
const_label_iterator
>
void
operator
()
(
const_label_iterator
truth
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
)
const
{
const
size_t
bytes_per_plane
=
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
()
*
sizeof
(
uint16_t
);
// Allocate a cuda buffer to store all the truth images and also one float
// for the scalar loss output.
cuda_data_void_ptr
buf
=
work
->
get
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
cuda_data_void_ptr
loss_buf
=
buf
;
buf
=
buf
+
sizeof
(
float
);
// copy the truth data into a cuda buffer.
for
(
long
i
=
0
;
i
<
subnetwork_output
.
num_samples
();
++
i
,
++
truth
)
{
const
matrix
<
uint16_t
>&
t
=
*
truth
;
DLIB_ASSERT
(
t
.
nr
()
==
subnetwork_output
.
nr
());
DLIB_ASSERT
(
t
.
nc
()
==
subnetwork_output
.
nc
());
memcpy
(
buf
+
i
*
bytes_per_plane
,
&
t
(
0
,
0
),
bytes_per_plane
);
}
do_work
(
static_cast
<
float
*>
(
loss_buf
.
data
()),
static_cast
<
uint16_t
*>
(
buf
.
data
()),
subnetwork_output
,
gradient
,
loss
);
}
private:
static
void
do_work
(
float
*
loss_cuda_work_buffer
,
const
uint16_t
*
truth_buffer
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
);
std
::
shared_ptr
<
resizable_cuda_buffer
>
work
;
};
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/loss.h
View file @
49a5d39d
...
...
@@ -2407,6 +2407,13 @@ namespace dlib
"output size = "
<<
output_tensor
.
nr
()
<<
" x "
<<
output_tensor
.
nc
());
}
#ifdef DLIB_USE_CUDA
double
loss
;
cuda_compute
(
truth
,
output_tensor
,
grad
,
loss
);
return
loss
;
#else
tt
::
softmax
(
grad
,
output_tensor
);
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
...
...
@@ -2445,6 +2452,7 @@ namespace dlib
}
}
return
loss
;
#endif
}
friend
void
serialize
(
const
loss_multiclass_log_per_pixel_
&
,
std
::
ostream
&
out
)
...
...
@@ -2478,6 +2486,10 @@ namespace dlib
return
((
sample
*
t
.
k
()
+
k
)
*
t
.
nr
()
+
row
)
*
t
.
nc
()
+
column
;
}
#ifdef DLIB_USE_CUDA
cuda
::
compute_loss_multiclass_log_per_pixel
cuda_compute
;
#endif
};
template
<
typename
SUBNET
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment