Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
bd6994cc
Commit
bd6994cc
authored
Jan 20, 2020
by
Juha Reunanen
Committed by
Davis E. King
Jan 20, 2020
Browse files
Add new loss layer for binary loss per pixel (#1976)
* Add new loss layer for binary loss per pixel
parent
6bdd289f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
599 additions
and
41 deletions
+599
-41
dlib/cuda/cuda_dlib.cu
dlib/cuda/cuda_dlib.cu
+71
-6
dlib/cuda/cuda_dlib.h
dlib/cuda/cuda_dlib.h
+68
-6
dlib/dnn/loss.h
dlib/dnn/loss.h
+158
-0
dlib/dnn/loss_abstract.h
dlib/dnn/loss_abstract.h
+62
-0
dlib/test/dnn.cpp
dlib/test/dnn.cpp
+199
-0
examples/dnn_instance_segmentation_ex.cpp
examples/dnn_instance_segmentation_ex.cpp
+28
-16
examples/dnn_instance_segmentation_ex.h
examples/dnn_instance_segmentation_ex.h
+6
-6
examples/dnn_instance_segmentation_train_ex.cpp
examples/dnn_instance_segmentation_train_ex.cpp
+7
-7
No files found.
dlib/cuda/cuda_dlib.cu
View file @
bd6994cc
...
@@ -1681,6 +1681,48 @@ namespace dlib
...
@@ -1681,6 +1681,48 @@ namespace dlib
}
}
}
}
// ----------------------------------------------------------------------------------------
__device__
float
cuda_log1pexp
(
float
x
)
{
if
(
x
<=
-
18
)
return
std
::
exp
(
x
);
else
if
(
-
18
<
x
&&
x
<=
9
)
return
std
::
log1p
(
std
::
exp
(
x
));
else
if
(
9
<
x
&&
x
<=
16
)
return
x
+
std
::
exp
(
-
x
);
else
return
x
;
}
__global__
void
_cuda_compute_loss_binary_log_per_pixel
(
float
*
loss_out
,
float
*
g
,
const
float
*
truth
,
const
float
*
out_data
,
size_t
n
,
const
float
scale
)
{
float
loss
=
0
;
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
const
float
y
=
truth
[
i
];
if
(
y
>
0.
f
)
{
const
float
temp
=
cuda_log1pexp
(
-
out_data
[
i
]);
loss
+=
y
*
temp
;
g
[
i
]
=
y
*
scale
*
(
g
[
i
]
-
1
);
}
else
if
(
y
<
0.
f
)
{
const
float
temp
=
-
(
-
out_data
[
i
]
-
cuda_log1pexp
(
-
out_data
[
i
]));
loss
+=
-
y
*
temp
;
g
[
i
]
=
-
y
*
scale
*
g
[
i
];
}
else
{
g
[
i
]
=
0.
f
;
}
}
warp_reduce_atomic_add
(
*
loss_out
,
loss
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
__device__
float
cuda_safe_log
(
float
x
,
float
epsilon
=
1e-10
)
__device__
float
cuda_safe_log
(
float
x
,
float
epsilon
=
1e-10
)
...
@@ -1720,29 +1762,52 @@ namespace dlib
...
@@ -1720,29 +1762,52 @@ namespace dlib
warp_reduce_atomic_add
(
*
loss_out
,
loss
);
warp_reduce_atomic_add
(
*
loss_out
,
loss
);
}
}
// ----------------------------------------------------------------------------------------
void
compute_loss_binary_log_per_pixel
::
do_work
(
cuda_data_ptr
<
float
>
loss_work_buffer
,
cuda_data_ptr
<
const
float
>
truth_buffer
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
)
{
CHECK_CUDA
(
cudaMemset
(
loss_work_buffer
,
0
,
sizeof
(
float
)));
sigmoid
(
gradient
,
subnetwork_output
);
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
const
double
scale
=
1.0
/
(
subnetwork_output
.
num_samples
()
*
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
());
launch_kernel
(
_cuda_compute_loss_binary_log_per_pixel
,
max_jobs
(
gradient
.
size
()),
loss_work_buffer
.
data
(),
gradient
.
device
(),
truth_buffer
.
data
(),
subnetwork_output
.
device
(),
gradient
.
size
(),
scale
);
float
floss
;
dlib
::
cuda
::
memcpy
(
&
floss
,
loss_work_buffer
);
loss
=
scale
*
floss
;
}
void
compute_loss_multiclass_log_per_pixel
::
void
compute_loss_multiclass_log_per_pixel
::
do_work
(
do_work
(
float
*
loss_
cuda_
work_buffer
,
cuda_data_ptr
<
float
>
loss_work_buffer
,
const
uint16_t
*
truth_buffer
,
cuda_data_ptr
<
const
uint16_t
>
truth_buffer
,
const
tensor
&
subnetwork_output
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
tensor
&
gradient
,
double
&
loss
double
&
loss
)
)
{
{
CHECK_CUDA
(
cudaMemset
(
loss_
cuda_
work_buffer
,
0
,
sizeof
(
float
)));
CHECK_CUDA
(
cudaMemset
(
loss_work_buffer
,
0
,
sizeof
(
float
)));
softmax
(
gradient
,
subnetwork_output
);
softmax
(
gradient
,
subnetwork_output
);
static
const
uint16_t
label_to_ignore
=
std
::
numeric_limits
<
uint16_t
>::
max
();
static
const
uint16_t
label_to_ignore
=
std
::
numeric_limits
<
uint16_t
>::
max
();
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
const
double
scale
=
1.0
/
(
subnetwork_output
.
num_samples
()
*
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
());
const
double
scale
=
1.0
/
(
subnetwork_output
.
num_samples
()
*
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
());
launch_kernel
(
_cuda_compute_loss_multiclass_log_per_pixel
,
max_jobs
(
gradient
.
size
()),
launch_kernel
(
_cuda_compute_loss_multiclass_log_per_pixel
,
max_jobs
(
gradient
.
size
()),
loss_
cuda_
work_buffer
,
gradient
.
device
(),
truth_buffer
,
gradient
.
size
(),
gradient
.
nr
()
*
gradient
.
nc
(),
gradient
.
nr
()
*
gradient
.
nc
()
*
gradient
.
k
(),
gradient
.
k
(),
label_to_ignore
,
scale
);
loss_work_buffer
.
data
()
,
gradient
.
device
(),
truth_buffer
.
data
()
,
gradient
.
size
(),
gradient
.
nr
()
*
gradient
.
nc
(),
gradient
.
nr
()
*
gradient
.
nc
()
*
gradient
.
k
(),
gradient
.
k
(),
label_to_ignore
,
scale
);
float
floss
;
float
floss
;
CHECK_CUDA
(
cuda
M
emcpy
(
&
floss
,
loss_
cuda_
work_buffer
,
sizeof
(
float
),
cudaMemcpyDefault
)
);
dlib
::
cuda
::
m
emcpy
(
&
floss
,
loss_work_buffer
);
loss
=
scale
*
floss
;
loss
=
scale
*
floss
;
}
}
...
...
dlib/cuda/cuda_dlib.h
View file @
bd6994cc
...
@@ -424,11 +424,70 @@ namespace dlib
...
@@ -424,11 +424,70 @@ namespace dlib
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class
compute_loss_binary_log_per_pixel
{
/*!
The point of this class is to compute the loss computed by
loss_binary_log_per_pixel_, but to do so with CUDA.
!*/
public:
compute_loss_binary_log_per_pixel
(
)
{
}
template
<
typename
const_label_iterator
>
void
operator
()
(
const_label_iterator
truth
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
)
const
{
const
auto
image_size
=
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
();
const
size_t
bytes_per_plane
=
image_size
*
sizeof
(
float
);
// Allocate a cuda buffer to store all the truth images and also one float
// for the scalar loss output.
buf
=
device_global_buffer
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
cuda_data_ptr
<
float
>
loss_buf
=
static_pointer_cast
<
float
>
(
buf
,
1
);
buf
=
buf
+
sizeof
(
float
);
// copy the truth data into a cuda buffer.
for
(
long
i
=
0
;
i
<
subnetwork_output
.
num_samples
();
++
i
,
++
truth
)
{
const
matrix
<
float
>&
t
=
*
truth
;
DLIB_ASSERT
(
t
.
nr
()
==
subnetwork_output
.
nr
());
DLIB_ASSERT
(
t
.
nc
()
==
subnetwork_output
.
nc
());
memcpy
(
buf
+
i
*
bytes_per_plane
,
&
t
(
0
,
0
),
bytes_per_plane
);
}
auto
truth_buf
=
static_pointer_cast
<
const
float
>
(
buf
,
subnetwork_output
.
num_samples
()
*
image_size
);
do_work
(
loss_buf
,
truth_buf
,
subnetwork_output
,
gradient
,
loss
);
}
private:
static
void
do_work
(
cuda_data_ptr
<
float
>
loss_work_buffer
,
cuda_data_ptr
<
const
float
>
truth_buffer
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
double
&
loss
);
mutable
cuda_data_void_ptr
buf
;
};
class
compute_loss_multiclass_log_per_pixel
class
compute_loss_multiclass_log_per_pixel
{
{
/*!
/*!
The point of this class is to compute the loss computed by
The point of this class is to compute the loss computed by
loss_multiclass_log_per_pixel, but to do so with CUDA.
loss_multiclass_log_per_pixel
_
, but to do so with CUDA.
!*/
!*/
public:
public:
...
@@ -447,12 +506,13 @@ namespace dlib
...
@@ -447,12 +506,13 @@ namespace dlib
double
&
loss
double
&
loss
)
const
)
const
{
{
const
size_t
bytes_per_plane
=
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
()
*
sizeof
(
uint16_t
);
const
auto
image_size
=
subnetwork_output
.
nr
()
*
subnetwork_output
.
nc
();
const
size_t
bytes_per_plane
=
image_size
*
sizeof
(
uint16_t
);
// Allocate a cuda buffer to store all the truth images and also one float
// Allocate a cuda buffer to store all the truth images and also one float
// for the scalar loss output.
// for the scalar loss output.
buf
=
device_global_buffer
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
buf
=
device_global_buffer
(
subnetwork_output
.
num_samples
()
*
bytes_per_plane
+
sizeof
(
float
));
cuda_data_
void_ptr
loss_buf
=
buf
;
cuda_data_
ptr
<
float
>
loss_buf
=
static_pointer_cast
<
float
>
(
buf
,
1
)
;
buf
=
buf
+
sizeof
(
float
);
buf
=
buf
+
sizeof
(
float
);
// copy the truth data into a cuda buffer.
// copy the truth data into a cuda buffer.
...
@@ -464,14 +524,16 @@ namespace dlib
...
@@ -464,14 +524,16 @@ namespace dlib
memcpy
(
buf
+
i
*
bytes_per_plane
,
&
t
(
0
,
0
),
bytes_per_plane
);
memcpy
(
buf
+
i
*
bytes_per_plane
,
&
t
(
0
,
0
),
bytes_per_plane
);
}
}
do_work
(
static_cast
<
float
*>
(
loss_buf
.
data
()),
static_cast
<
uint16_t
*>
(
buf
.
data
()),
subnetwork_output
,
gradient
,
loss
);
auto
truth_buf
=
static_pointer_cast
<
const
uint16_t
>
(
buf
,
subnetwork_output
.
num_samples
()
*
image_size
);
do_work
(
loss_buf
,
truth_buf
,
subnetwork_output
,
gradient
,
loss
);
}
}
private:
private:
static
void
do_work
(
static
void
do_work
(
float
*
loss_
cuda_
work_buffer
,
cuda_data_ptr
<
float
>
loss_work_buffer
,
const
uint16_t
*
truth_buffer
,
cuda_data_ptr
<
const
uint16_t
>
truth_buffer
,
const
tensor
&
subnetwork_output
,
const
tensor
&
subnetwork_output
,
tensor
&
gradient
,
tensor
&
gradient
,
double
&
loss
double
&
loss
...
...
dlib/dnn/loss.h
View file @
bd6994cc
...
@@ -2481,6 +2481,164 @@ namespace dlib
...
@@ -2481,6 +2481,164 @@ namespace dlib
template
<
typename
SUBNET
>
template
<
typename
SUBNET
>
using
loss_mean_squared_multioutput
=
add_loss_layer
<
loss_mean_squared_multioutput_
,
SUBNET
>
;
using
loss_mean_squared_multioutput
=
add_loss_layer
<
loss_mean_squared_multioutput_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_binary_log_per_pixel_
{
public:
typedef
matrix
<
float
>
training_label_type
;
typedef
matrix
<
float
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
static
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
{
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
const
tensor
&
output_tensor
=
sub
.
get_output
();
DLIB_CASSERT
(
output_tensor
.
k
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
const
float
*
const
out_data
=
output_tensor
.
host
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
,
++
iter
)
{
iter
->
set_size
(
output_tensor
.
nr
(),
output_tensor
.
nc
());
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
output_tensor
.
nc
();
++
c
)
{
iter
->
operator
()(
r
,
c
)
=
out_data
[
tensor_index
(
output_tensor
,
i
,
r
,
c
)];
}
}
}
}
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
tensor
&
grad
=
sub
.
get_gradient_input
();
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
!=
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
%
sub
.
sample_expansion_factor
()
==
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
k
()
==
1
);
DLIB_CASSERT
(
output_tensor
.
nr
()
==
grad
.
nr
()
&&
output_tensor
.
nc
()
==
grad
.
nc
()
&&
output_tensor
.
k
()
==
grad
.
k
());
for
(
long
idx
=
0
;
idx
<
output_tensor
.
num_samples
();
++
idx
)
{
const_label_iterator
truth_matrix_ptr
=
(
truth
+
idx
);
DLIB_CASSERT
(
truth_matrix_ptr
->
nr
()
==
output_tensor
.
nr
()
&&
truth_matrix_ptr
->
nc
()
==
output_tensor
.
nc
(),
"truth size = "
<<
truth_matrix_ptr
->
nr
()
<<
" x "
<<
truth_matrix_ptr
->
nc
()
<<
", "
"output size = "
<<
output_tensor
.
nr
()
<<
" x "
<<
output_tensor
.
nc
());
}
#ifdef DLIB_USE_CUDA
double
loss
;
cuda_compute
(
truth
,
output_tensor
,
grad
,
loss
);
return
loss
;
#else
tt
::
sigmoid
(
grad
,
output_tensor
);
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
const
double
scale
=
1.0
/
(
output_tensor
.
num_samples
()
*
output_tensor
.
nr
()
*
output_tensor
.
nc
());
double
loss
=
0
;
float
*
const
g
=
grad
.
host
();
const
float
*
const
out_data
=
output_tensor
.
host
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
,
++
truth
)
{
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
output_tensor
.
nc
();
++
c
)
{
const
float
y
=
truth
->
operator
()(
r
,
c
);
const
size_t
idx
=
tensor_index
(
output_tensor
,
i
,
r
,
c
);
if
(
y
>
0.
f
)
{
const
float
temp
=
log1pexp
(
-
out_data
[
idx
]);
loss
+=
y
*
scale
*
temp
;
g
[
idx
]
=
y
*
scale
*
(
g
[
idx
]
-
1
);
}
else
if
(
y
<
0.
f
)
{
const
float
temp
=
-
(
-
out_data
[
idx
]
-
log1pexp
(
-
out_data
[
idx
]));
loss
+=
-
y
*
scale
*
temp
;
g
[
idx
]
=
-
y
*
scale
*
g
[
idx
];
}
else
{
g
[
idx
]
=
0.
f
;
}
}
}
}
return
loss
;
#endif
}
friend
void
serialize
(
const
loss_binary_log_per_pixel_
&
,
std
::
ostream
&
out
)
{
serialize
(
"loss_binary_log_per_pixel_"
,
out
);
}
friend
void
deserialize
(
loss_binary_log_per_pixel_
&
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"loss_binary_log_per_pixel_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_binary_log_per_pixel_."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_binary_log_per_pixel_
&
)
{
out
<<
"loss_binary_log_per_pixel"
;
return
out
;
}
friend
void
to_xml
(
const
loss_binary_log_per_pixel_
&
/*item*/
,
std
::
ostream
&
out
)
{
out
<<
"<loss_binary_log_per_pixel/>"
;
}
private:
static
size_t
tensor_index
(
const
tensor
&
t
,
long
sample
,
long
row
,
long
column
)
{
DLIB_ASSERT
(
t
.
k
()
==
1
);
// See: https://github.com/davisking/dlib/blob/4dfeb7e186dd1bf6ac91273509f687293bd4230a/dlib/dnn/tensor_abstract.h#L38
return
(
sample
*
t
.
nr
()
+
row
)
*
t
.
nc
()
+
column
;
}
#ifdef DLIB_USE_CUDA
cuda
::
compute_loss_binary_log_per_pixel
cuda_compute
;
#endif
};
template
<
typename
SUBNET
>
using
loss_binary_log_per_pixel
=
add_loss_layer
<
loss_binary_log_per_pixel_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class
loss_multiclass_log_per_pixel_
class
loss_multiclass_log_per_pixel_
...
...
dlib/dnn/loss_abstract.h
View file @
bd6994cc
...
@@ -1283,6 +1283,68 @@ namespace dlib
...
@@ -1283,6 +1283,68 @@ namespace dlib
template
<
typename
SUBNET
>
template
<
typename
SUBNET
>
using
loss_mean_squared_multioutput
=
add_loss_layer
<
loss_mean_squared_multioutput_
,
SUBNET
>
;
using
loss_mean_squared_multioutput
=
add_loss_layer
<
loss_mean_squared_multioutput_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_binary_log_per_pixel_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the log loss, which is
appropriate for binary classification problems. It is basically just like
loss_binary_log_ except that it lets you define matrix outputs instead
of scalar outputs. It should be useful, for example, in segmentation
where we want to classify each pixel of an image, and also get at least
some sort of confidence estimate for each pixel.
!*/
public:
typedef
matrix
<
float
>
training_label_type
;
typedef
matrix
<
float
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
const
;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output label is the raw score for each classified object. If the score
is > 0 then the classifier is predicting the +1 class, otherwise it is
predicting the -1 class.
!*/
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- all pixel values pointed to by truth correspond to the desired target values.
Nominally they should be +1 or -1, each indicating the desired class label,
or 0 to indicate that the corresponding pixel is to be ignored.
!*/
};
template
<
typename
SUBNET
>
using
loss_binary_log_per_pixel
=
add_loss_layer
<
loss_binary_log_per_pixel_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class
loss_multiclass_log_per_pixel_
class
loss_multiclass_log_per_pixel_
...
...
dlib/test/dnn.cpp
View file @
bd6994cc
...
@@ -2587,6 +2587,202 @@ namespace
...
@@ -2587,6 +2587,202 @@ namespace
DLIB_TEST_MSG
(
error_after
<
error_before
,
"multi channel error increased after training"
);
DLIB_TEST_MSG
(
error_after
<
error_before
,
"multi channel error increased after training"
);
}
}
// ----------------------------------------------------------------------------------------
void
test_loss_binary_log_per_pixel_learned_params_on_trivial_two_pixel_task
()
{
print_spinner
();
::
std
::
vector
<
matrix
<
float
>>
x
({
matrix
<
float
,
2
,
1
>
({
-
1
,
1
})
});
::
std
::
vector
<
matrix
<
float
>>
y
({
matrix
<
float
,
2
,
1
>
({
-
1
,
1
})
});
using
net_type
=
loss_binary_log_per_pixel
<
con
<
1
,
1
,
1
,
1
,
1
,
input
<
matrix
<
float
>>>>
;
net_type
net
;
dnn_trainer
<
net_type
>
trainer
(
net
,
sgd
(
0
,
0
));
trainer
.
set_learning_rate
(
1e7
);
trainer
.
set_max_num_epochs
(
1
);
trainer
.
train
(
x
,
y
);
const
tensor
&
learned_params
=
layer
<
1
>
(
net
).
layer_details
().
get_layer_params
();
const
float
*
learned_params_data
=
learned_params
.
host
();
DLIB_TEST
(
learned_params_data
[
0
]
>
1e5
);
DLIB_TEST
(
abs
(
learned_params_data
[
1
])
<
1
);
}
// ----------------------------------------------------------------------------------------
void
test_loss_binary_log_per_pixel_outputs_on_trivial_task
()
{
print_spinner
();
constexpr
int
input_height
=
7
;
constexpr
int
input_width
=
5
;
constexpr
int
output_height
=
input_height
;
constexpr
int
output_width
=
input_width
;
constexpr
int
num_samples
=
7
;
constexpr
int
filter_height
=
3
;
constexpr
int
filter_width
=
3
;
::
std
::
vector
<
matrix
<
double
>>
x
(
num_samples
);
::
std
::
vector
<
matrix
<
float
>>
y
(
num_samples
);
matrix
<
double
>
xtmp
(
input_height
,
input_width
);
matrix
<
float
>
ytmp
(
output_height
,
output_width
);
::
std
::
default_random_engine
generator
(
16
);
::
std
::
normal_distribution
<
double
>
n01
(
0
);
const
auto
z
=
0.674490
;
// This should give us a 50/50 split between the classes
// Generate training data: random inputs x, and the corresponding target outputs y
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
for
(
int
jj
=
0
;
jj
<
input_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
input_width
;
++
kk
)
{
xtmp
(
jj
,
kk
)
=
n01
(
generator
);
ytmp
(
jj
,
kk
)
=
std
::
abs
(
xtmp
(
jj
,
kk
))
>
z
?
1.
f
:
-
1.
f
;
}
}
x
[
ii
]
=
xtmp
;
y
[
ii
]
=
ytmp
;
}
using
net_type
=
loss_binary_log_per_pixel
<
con
<
1
,
1
,
1
,
1
,
1
,
relu
<
con
<
10
,
1
,
1
,
1
,
1
,
input
<
matrix
<
double
>>>>>>
;
net_type
net
;
dnn_trainer
<
net_type
>
trainer
(
net
,
sgd
(
0
,
0.9
));
trainer
.
set_learning_rate
(
1
);
trainer
.
set_max_num_epochs
(
800
);
trainer
.
train
(
x
,
y
);
// The learning task is easy, so the net should have no problem
// getting all the outputs right.
const
auto
response
=
net
(
x
);
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
for
(
int
jj
=
0
;
jj
<
output_height
;
++
jj
)
for
(
int
kk
=
0
;
kk
<
output_width
;
++
kk
)
DLIB_TEST
((
response
[
ii
](
jj
,
kk
)
>
0
)
==
(
y
[
ii
](
jj
,
kk
)
>
0
));
}
// ----------------------------------------------------------------------------------------
void
test_loss_binary_log_per_pixel_with_noise_and_pixels_to_ignore
()
{
// Test learning when some pixels are to be ignored, etc.
print_spinner
();
constexpr
int
input_height
=
5
;
constexpr
int
input_width
=
7
;
constexpr
int
output_height
=
input_height
;
constexpr
int
output_width
=
input_width
;
const
int
num_samples
=
1000
;
const
double
ignore_probability
=
0.5
;
const
double
noise_probability
=
0.05
;
::
std
::
default_random_engine
generator
(
16
);
::
std
::
bernoulli_distribution
ignore
(
ignore_probability
);
::
std
::
bernoulli_distribution
noise_occurrence
(
noise_probability
);
::
std
::
bernoulli_distribution
noisy_label
(
0.5
);
::
std
::
vector
<
matrix
<
double
>>
x
(
num_samples
);
::
std
::
vector
<
matrix
<
float
>>
y
(
num_samples
);
::
std
::
vector
<
int
>
truth_histogram
(
2
);
matrix
<
double
>
xtmp
(
input_height
,
input_width
);
matrix
<
float
>
ytmp
(
output_height
,
output_width
);
// The function to be learned.
const
auto
ground_truth
=
[](
const
matrix
<
double
>&
x
,
int
row
,
int
column
)
{
double
sum
=
0.0
;
const
int
first_column
=
std
::
max
(
0
,
column
-
1
);
const
int
last_column
=
std
::
min
(
static_cast
<
int
>
(
x
.
nc
()
-
1
),
column
+
1
);
for
(
int
c
=
first_column
;
c
<=
last_column
;
++
c
)
{
sum
+=
x
(
row
,
c
);
}
DLIB_TEST
(
sum
<
2.0
*
(
last_column
-
first_column
+
1
));
return
sum
>
(
last_column
-
first_column
+
1
);
};
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
for
(
int
jj
=
0
;
jj
<
input_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
input_width
;
++
kk
)
{
// Generate numbers between 0 and 2.
double
value
=
static_cast
<
double
>
(
ii
+
jj
+
kk
)
/
10.0
;
value
-=
(
static_cast
<
int
>
(
value
)
/
2
)
*
2
;
DLIB_TEST
(
value
>=
0.0
&&
value
<
2.0
);
xtmp
(
jj
,
kk
)
=
value
;
}
}
x
[
ii
]
=
xtmp
;
for
(
int
jj
=
0
;
jj
<
output_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
output_width
;
++
kk
)
{
const
bool
truth
=
ground_truth
(
x
[
ii
],
jj
,
kk
);
++
truth_histogram
[
truth
];
if
(
ignore
(
generator
))
{
ytmp
(
jj
,
kk
)
=
0.
f
;
}
else
if
(
noise_occurrence
(
generator
))
{
ytmp
(
jj
,
kk
)
=
noisy_label
(
generator
)
?
1.
f
:
-
1.
f
;
}
else
{
ytmp
(
jj
,
kk
)
=
truth
?
1.
f
:
-
1.
f
;
}
}
}
y
[
ii
]
=
ytmp
;
}
const
int
num_total_elements
=
num_samples
*
output_height
*
output_width
;
{
// Require a reasonably balanced truth histogram in order to make sure that a trivial classifier is not enough
const
int
required_min_histogram_value
=
static_cast
<
int
>
(
::
std
::
ceil
(
num_total_elements
/
2.0
*
0.375
));
for
(
auto
histogram_value
:
truth_histogram
)
{
DLIB_TEST_MSG
(
histogram_value
>=
required_min_histogram_value
,
"Histogram value = "
<<
histogram_value
<<
", required = "
<<
required_min_histogram_value
);
}
}
using
net_type
=
loss_binary_log_per_pixel
<
con
<
1
,
1
,
input_width
,
1
,
1
,
input
<
matrix
<
double
>>>>
;
net_type
net
;
sgd
defsolver
(
0
,
0.9
);
dnn_trainer
<
net_type
>
trainer
(
net
,
defsolver
);
trainer
.
set_learning_rate
(
0.1
);
trainer
.
set_min_learning_rate
(
0.01
);
trainer
.
set_mini_batch_size
(
50
);
trainer
.
set_max_num_epochs
(
170
);
trainer
.
train
(
x
,
y
);
const
::
std
::
vector
<
matrix
<
float
>>
predictions
=
net
(
x
);
int
num_correct
=
0
;
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
const
matrix
<
float
>&
prediction
=
predictions
[
ii
];
DLIB_TEST
(
prediction
.
nr
()
==
output_height
);
DLIB_TEST
(
prediction
.
nc
()
==
output_width
);
for
(
int
jj
=
0
;
jj
<
output_height
;
++
jj
)
for
(
int
kk
=
0
;
kk
<
output_width
;
++
kk
)
if
(
(
prediction
(
jj
,
kk
)
>
0.
f
)
==
ground_truth
(
x
[
ii
],
jj
,
kk
)
)
++
num_correct
;
}
// First some sanity checks.
const
int
num_correct_max
=
num_total_elements
;
DLIB_TEST
(
num_correct_max
==
::
std
::
accumulate
(
truth_histogram
.
begin
(),
truth_histogram
.
end
(),
0
));
DLIB_TEST_MSG
(
num_correct
<=
num_correct_max
,
"Number of correctly classified elements = "
<<
num_correct
<<
", max = "
<<
num_correct_max
);
// This is the real test, verifying that we have actually learned something.
const
int
num_correct_required
=
static_cast
<
int
>
(
::
std
::
ceil
(
0.9
*
num_correct_max
));
DLIB_TEST_MSG
(
num_correct
>=
num_correct_required
,
"Number of correctly classified elements = "
<<
num_correct
<<
", required = "
<<
num_correct_required
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
void
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task
()
void
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task
()
...
@@ -3429,6 +3625,9 @@ namespace
...
@@ -3429,6 +3625,9 @@ namespace
test_multioutput_linear_regression
();
test_multioutput_linear_regression
();
test_simple_autoencoder
();
test_simple_autoencoder
();
test_loss_mean_squared_per_channel_and_pixel
();
test_loss_mean_squared_per_channel_and_pixel
();
test_loss_binary_log_per_pixel_learned_params_on_trivial_two_pixel_task
();
test_loss_binary_log_per_pixel_outputs_on_trivial_task
();
test_loss_binary_log_per_pixel_with_noise_and_pixels_to_ignore
();
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_outputs_on_trivial_task
();
test_loss_multiclass_per_pixel_outputs_on_trivial_task
();
...
...
examples/dnn_instance_segmentation_ex.cpp
View file @
bd6994cc
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
./dnn_instance_segmentation_ex /path/to/VOC2012-or-other-images
./dnn_instance_segmentation_ex /path/to/VOC2012-or-other-images
An alternative to steps 2-4 above is to download a pre-trained network
An alternative to steps 2-4 above is to download a pre-trained network
from here: http://dlib.net/files/instance_segmentation_voc2012net.dnn
from here: http://dlib.net/files/instance_segmentation_voc2012net
_v2
.dnn
It would be a good idea to become familiar with dlib's DNN tooling before reading this
It would be a good idea to become familiar with dlib's DNN tooling before reading this
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
...
@@ -71,26 +71,21 @@ int main(int argc, char** argv) try
...
@@ -71,26 +71,21 @@ int main(int argc, char** argv) try
{
{
// Load the input image.
// Load the input image.
load_image
(
input_image
,
file
.
full_name
());
load_image
(
input_image
,
file
.
full_name
());
// Draw largest objects last
const
auto
sort_instances
=
[](
const
std
::
vector
<
mmod_rect
>&
input
)
{
auto
output
=
input
;
const
auto
compare_area
=
[](
const
mmod_rect
&
lhs
,
const
mmod_rect
&
rhs
)
{
return
lhs
.
rect
.
area
()
<
rhs
.
rect
.
area
();
};
std
::
sort
(
output
.
begin
(),
output
.
end
(),
compare_area
);
return
output
;
};
// Find instances in the input image
// Find instances in the input image
const
auto
instances
=
sort_instances
(
det_net
(
input_image
)
)
;
const
auto
instances
=
det_net
(
input_image
);
matrix
<
rgb_pixel
>
rgb_label_image
;
matrix
<
rgb_pixel
>
rgb_label_image
;
matrix
<
float
>
label_image_confidence
;
matrix
<
rgb_pixel
>
input_chip
;
matrix
<
rgb_pixel
>
input_chip
;
rgb_label_image
.
set_size
(
input_image
.
nr
(),
input_image
.
nc
());
rgb_label_image
.
set_size
(
input_image
.
nr
(),
input_image
.
nc
());
rgb_label_image
=
rgb_pixel
(
0
,
0
,
0
);
rgb_label_image
=
rgb_pixel
(
0
,
0
,
0
);
label_image_confidence
.
set_size
(
input_image
.
nr
(),
input_image
.
nc
());
label_image_confidence
=
0.0
;
bool
found_something
=
false
;
bool
found_something
=
false
;
for
(
const
auto
&
instance
:
instances
)
for
(
const
auto
&
instance
:
instances
)
...
@@ -131,7 +126,7 @@ int main(int argc, char** argv) try
...
@@ -131,7 +126,7 @@ int main(int argc, char** argv) try
rnd
.
get_random_8bit_number
()
rnd
.
get_random_8bit_number
()
);
);
dlib
::
matrix
<
uint16_
t
>
resized_mask
(
dlib
::
matrix
<
floa
t
>
resized_mask
(
static_cast
<
int
>
(
chip_details
.
rect
.
height
()),
static_cast
<
int
>
(
chip_details
.
rect
.
height
()),
static_cast
<
int
>
(
chip_details
.
rect
.
width
())
static_cast
<
int
>
(
chip_details
.
rect
.
width
())
);
);
...
@@ -142,12 +137,29 @@ int main(int argc, char** argv) try
...
@@ -142,12 +137,29 @@ int main(int argc, char** argv) try
{
{
for
(
int
c
=
0
;
c
<
resized_mask
.
nc
();
++
c
)
for
(
int
c
=
0
;
c
<
resized_mask
.
nc
();
++
c
)
{
{
if
(
resized_mask
(
r
,
c
))
const
auto
new_confidence
=
resized_mask
(
r
,
c
);
if
(
new_confidence
>
0
)
{
{
const
auto
y
=
chip_details
.
rect
.
top
()
+
r
;
const
auto
y
=
chip_details
.
rect
.
top
()
+
r
;
const
auto
x
=
chip_details
.
rect
.
left
()
+
c
;
const
auto
x
=
chip_details
.
rect
.
left
()
+
c
;
if
(
y
>=
0
&&
y
<
rgb_label_image
.
nr
()
&&
x
>=
0
&&
x
<
rgb_label_image
.
nc
())
if
(
y
>=
0
&&
y
<
rgb_label_image
.
nr
()
&&
x
>=
0
&&
x
<
rgb_label_image
.
nc
())
rgb_label_image
(
y
,
x
)
=
random_color
;
{
auto
&
current_confidence
=
label_image_confidence
(
y
,
x
);
if
(
new_confidence
>
current_confidence
)
{
auto
rgb_label
=
random_color
;
const
auto
baseline_confidence
=
5
;
if
(
new_confidence
<
baseline_confidence
)
{
// Scale label intensity if confidence isn't high
rgb_label
.
red
*=
new_confidence
/
baseline_confidence
;
rgb_label
.
green
*=
new_confidence
/
baseline_confidence
;
rgb_label
.
blue
*=
new_confidence
/
baseline_confidence
;
}
rgb_label_image
(
y
,
x
)
=
rgb_label
;
current_confidence
=
new_confidence
;
}
}
}
}
}
}
}
}
...
...
examples/dnn_instance_segmentation_ex.h
View file @
bd6994cc
...
@@ -23,7 +23,7 @@
...
@@ -23,7 +23,7 @@
./dnn_instance_segmentation_ex /path/to/VOC2012-or-other-images
./dnn_instance_segmentation_ex /path/to/VOC2012-or-other-images
An alternative to steps 2-4 above is to download a pre-trained network
An alternative to steps 2-4 above is to download a pre-trained network
from here: http://dlib.net/files/instance_segmentation_voc2012net.dnn
from here: http://dlib.net/files/instance_segmentation_voc2012net
_v2
.dnn
It would be a good idea to become familiar with dlib's DNN tooling before reading this
It would be a good idea to become familiar with dlib's DNN tooling before reading this
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
...
@@ -159,13 +159,13 @@ template <typename SUBNET> using concat_utag4 = resize_and_concat<utag4,utag4_,S
...
@@ -159,13 +159,13 @@ template <typename SUBNET> using concat_utag4 = resize_and_concat<utag4,utag4_,S
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
static
const
char
*
instance_segmentation_net_filename
=
"instance_segmentation_voc2012net.dnn"
;
static
const
char
*
instance_segmentation_net_filename
=
"instance_segmentation_voc2012net
_v2
.dnn"
;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// training network type
// training network type
using
seg_bnet_type
=
dlib
::
loss_
multiclass
_log_per_pixel
<
using
seg_bnet_type
=
dlib
::
loss_
binary
_log_per_pixel
<
dlib
::
cont
<
2
,
1
,
1
,
1
,
1
,
dlib
::
cont
<
1
,
1
,
1
,
1
,
1
,
dlib
::
relu
<
dlib
::
bn_con
<
dlib
::
cont
<
16
,
7
,
7
,
2
,
2
,
dlib
::
relu
<
dlib
::
bn_con
<
dlib
::
cont
<
16
,
7
,
7
,
2
,
2
,
concat_utag1
<
level1t
<
concat_utag1
<
level1t
<
concat_utag2
<
level2t
<
concat_utag2
<
level2t
<
...
@@ -180,8 +180,8 @@ using seg_bnet_type = dlib::loss_multiclass_log_per_pixel<
...
@@ -180,8 +180,8 @@ using seg_bnet_type = dlib::loss_multiclass_log_per_pixel<
>>>>>>>>>>>>>>>>>>>>>>>>>
;
>>>>>>>>>>>>>>>>>>>>>>>>>
;
// testing network type (replaced batch normalization with fixed affine transforms)
// testing network type (replaced batch normalization with fixed affine transforms)
using
seg_anet_type
=
dlib
::
loss_
multiclass
_log_per_pixel
<
using
seg_anet_type
=
dlib
::
loss_
binary
_log_per_pixel
<
dlib
::
cont
<
2
,
1
,
1
,
1
,
1
,
dlib
::
cont
<
1
,
1
,
1
,
1
,
1
,
dlib
::
relu
<
dlib
::
affine
<
dlib
::
cont
<
16
,
7
,
7
,
2
,
2
,
dlib
::
relu
<
dlib
::
affine
<
dlib
::
cont
<
16
,
7
,
7
,
2
,
2
,
concat_utag1
<
alevel1t
<
concat_utag1
<
alevel1t
<
concat_utag2
<
alevel2t
<
concat_utag2
<
alevel2t
<
...
...
examples/dnn_instance_segmentation_train_ex.cpp
View file @
bd6994cc
...
@@ -49,7 +49,7 @@ struct det_training_sample
...
@@ -49,7 +49,7 @@ struct det_training_sample
struct
seg_training_sample
struct
seg_training_sample
{
{
matrix
<
rgb_pixel
>
input_image
;
matrix
<
rgb_pixel
>
input_image
;
matrix
<
uint16_
t
>
label_image
;
// The ground-truth label of each pixel.
matrix
<
floa
t
>
label_image
;
// The ground-truth label of each pixel.
(+1 or -1)
};
};
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
@@ -321,12 +321,12 @@ det_bnet_type train_detection_network(
...
@@ -321,12 +321,12 @@ det_bnet_type train_detection_network(
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
matrix
<
uint16_
t
>
keep_only_current_instance
(
const
matrix
<
rgb_pixel
>&
rgb_label_image
,
const
rgb_pixel
rgb_label
)
matrix
<
floa
t
>
keep_only_current_instance
(
const
matrix
<
rgb_pixel
>&
rgb_label_image
,
const
rgb_pixel
rgb_label
)
{
{
const
auto
nr
=
rgb_label_image
.
nr
();
const
auto
nr
=
rgb_label_image
.
nr
();
const
auto
nc
=
rgb_label_image
.
nc
();
const
auto
nc
=
rgb_label_image
.
nc
();
matrix
<
uint16_
t
>
result
(
nr
,
nc
);
matrix
<
floa
t
>
result
(
nr
,
nc
);
for
(
long
r
=
0
;
r
<
nr
;
++
r
)
for
(
long
r
=
0
;
r
<
nr
;
++
r
)
{
{
...
@@ -334,11 +334,11 @@ matrix<uint16_t> keep_only_current_instance(const matrix<rgb_pixel>& rgb_label_i
...
@@ -334,11 +334,11 @@ matrix<uint16_t> keep_only_current_instance(const matrix<rgb_pixel>& rgb_label_i
{
{
const
auto
&
index
=
rgb_label_image
(
r
,
c
);
const
auto
&
index
=
rgb_label_image
(
r
,
c
);
if
(
index
==
rgb_label
)
if
(
index
==
rgb_label
)
result
(
r
,
c
)
=
1
;
result
(
r
,
c
)
=
+
1
;
else
if
(
index
==
dlib
::
rgb_pixel
(
224
,
224
,
192
))
else
if
(
index
==
dlib
::
rgb_pixel
(
224
,
224
,
192
))
result
(
r
,
c
)
=
dlib
::
loss_multiclass_log_per_pixel_
::
label_to_ignore
;
else
result
(
r
,
c
)
=
0
;
result
(
r
,
c
)
=
0
;
else
result
(
r
,
c
)
=
-
1
;
}
}
}
}
...
@@ -373,7 +373,7 @@ seg_bnet_type train_segmentation_network(
...
@@ -373,7 +373,7 @@ seg_bnet_type train_segmentation_network(
cout
<<
seg_trainer
<<
endl
;
cout
<<
seg_trainer
<<
endl
;
std
::
vector
<
matrix
<
rgb_pixel
>>
samples
;
std
::
vector
<
matrix
<
rgb_pixel
>>
samples
;
std
::
vector
<
matrix
<
uint16_
t
>>
labels
;
std
::
vector
<
matrix
<
floa
t
>>
labels
;
// Start a bunch of threads that read images from disk and pull out random crops. It's
// Start a bunch of threads that read images from disk and pull out random crops. It's
// important to be sure to feed the GPU fast enough to keep it busy. Using multiple
// important to be sure to feed the GPU fast enough to keep it busy. Using multiple
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment