Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
ebdc064c
Commit
ebdc064c
authored
Jul 07, 2017
by
Davis King
Browse files
merged
parents
917dcad3
0ed1ce61
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
337 additions
and
7 deletions
+337
-7
dlib/dnn/loss.h
dlib/dnn/loss.h
+145
-3
dlib/dnn/loss_abstract.h
dlib/dnn/loss_abstract.h
+88
-0
dlib/test/dnn.cpp
dlib/test/dnn.cpp
+100
-4
setup.py
setup.py
+4
-0
No files found.
dlib/dnn/loss.h
View file @
ebdc064c
...
@@ -1549,11 +1549,11 @@ namespace dlib
...
@@ -1549,11 +1549,11 @@ namespace dlib
typename
SUB_TYPE
,
typename
SUB_TYPE
,
typename
label_iterator
typename
label_iterator
>
>
void
to_label
(
static
void
to_label
(
const
tensor
&
input_tensor
,
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
label_iterator
iter
)
const
)
{
{
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
...
@@ -1678,7 +1678,7 @@ namespace dlib
...
@@ -1678,7 +1678,7 @@ namespace dlib
std
::
string
version
;
std
::
string
version
;
deserialize
(
version
,
in
);
deserialize
(
version
,
in
);
if
(
version
!=
"loss_multiclass_log_per_pixel_"
)
if
(
version
!=
"loss_multiclass_log_per_pixel_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_multiclass_log_."
);
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_multiclass_log_
per_pixel_
."
);
}
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_multiclass_log_per_pixel_
&
)
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_multiclass_log_per_pixel_
&
)
...
@@ -1704,6 +1704,148 @@ namespace dlib
...
@@ -1704,6 +1704,148 @@ namespace dlib
template
<
typename
SUBNET
>
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel
=
add_loss_layer
<
loss_multiclass_log_per_pixel_
,
SUBNET
>
;
using
loss_multiclass_log_per_pixel
=
add_loss_layer
<
loss_multiclass_log_per_pixel_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_multiclass_log_per_pixel_weighted_
{
public:
struct
weighted_label
{
weighted_label
()
{}
weighted_label
(
uint16_t
label
,
float
weight
=
1.
f
)
:
label
(
label
),
weight
(
weight
)
{}
// In semantic segmentation, 65536 classes ought to be enough for anybody.
uint16_t
label
=
0
;
float
weight
=
1.
f
;
};
typedef
matrix
<
weighted_label
>
training_label_type
;
typedef
matrix
<
uint16_t
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
static
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
{
loss_multiclass_log_per_pixel_
::
to_label
(
input_tensor
,
sub
,
iter
);
}
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
tensor
&
grad
=
sub
.
get_gradient_input
();
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
!=
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
%
sub
.
sample_expansion_factor
()
==
0
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
k
()
>=
1
);
DLIB_CASSERT
(
output_tensor
.
k
()
<
std
::
numeric_limits
<
uint16_t
>::
max
());
DLIB_CASSERT
(
output_tensor
.
nr
()
==
grad
.
nr
()
&&
output_tensor
.
nc
()
==
grad
.
nc
()
&&
output_tensor
.
k
()
==
grad
.
k
());
for
(
long
idx
=
0
;
idx
<
output_tensor
.
num_samples
();
++
idx
)
{
const_label_iterator
truth_matrix_ptr
=
(
truth
+
idx
);
DLIB_CASSERT
(
truth_matrix_ptr
->
nr
()
==
output_tensor
.
nr
()
&&
truth_matrix_ptr
->
nc
()
==
output_tensor
.
nc
(),
"truth size = "
<<
truth_matrix_ptr
->
nr
()
<<
" x "
<<
truth_matrix_ptr
->
nc
()
<<
", "
"output size = "
<<
output_tensor
.
nr
()
<<
" x "
<<
output_tensor
.
nc
());
}
tt
::
softmax
(
grad
,
output_tensor
);
// The loss we output is the weighted average loss over the mini-batch, and also over each element of the matrix output.
const
double
scale
=
1.0
/
(
output_tensor
.
num_samples
()
*
output_tensor
.
nr
()
*
output_tensor
.
nc
());
double
loss
=
0
;
float
*
const
g
=
grad
.
host
();
for
(
long
i
=
0
;
i
<
output_tensor
.
num_samples
();
++
i
,
++
truth
)
{
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
output_tensor
.
nc
();
++
c
)
{
const
weighted_label
&
weighted_label
=
truth
->
operator
()(
r
,
c
);
const
uint16_t
y
=
weighted_label
.
label
;
const
float
weight
=
weighted_label
.
weight
;
// The network must produce a number of outputs that is equal to the number
// of labels when using this type of loss.
DLIB_CASSERT
(
static_cast
<
long
>
(
y
)
<
output_tensor
.
k
()
||
weight
==
0.
f
,
"y: "
<<
y
<<
", output_tensor.k(): "
<<
output_tensor
.
k
());
for
(
long
k
=
0
;
k
<
output_tensor
.
k
();
++
k
)
{
const
size_t
idx
=
tensor_index
(
output_tensor
,
i
,
r
,
c
,
k
);
if
(
k
==
y
)
{
loss
+=
weight
*
scale
*-
std
::
log
(
g
[
idx
]);
g
[
idx
]
=
weight
*
scale
*
(
g
[
idx
]
-
1
);
}
else
{
g
[
idx
]
=
weight
*
scale
*
g
[
idx
];
}
}
}
}
}
return
loss
;
}
friend
void
serialize
(
const
loss_multiclass_log_per_pixel_weighted_
&
,
std
::
ostream
&
out
)
{
serialize
(
"loss_multiclass_log_per_pixel_weighted_"
,
out
);
}
friend
void
deserialize
(
loss_multiclass_log_per_pixel_weighted_
&
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"loss_multiclass_log_per_pixel_weighted_"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::loss_multiclass_log_per_pixel_weighted_."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
loss_multiclass_log_per_pixel_weighted_
&
)
{
out
<<
"loss_multiclass_log_per_pixel_weighted"
;
return
out
;
}
friend
void
to_xml
(
const
loss_multiclass_log_per_pixel_weighted_
&
/*item*/
,
std
::
ostream
&
out
)
{
out
<<
"<loss_multiclass_log_per_pixel_weighted/>"
;
}
private:
static
size_t
tensor_index
(
const
tensor
&
t
,
long
sample
,
long
row
,
long
column
,
long
k
)
{
// See: https://github.com/davisking/dlib/blob/4dfeb7e186dd1bf6ac91273509f687293bd4230a/dlib/dnn/tensor_abstract.h#L38
return
((
sample
*
t
.
k
()
+
k
)
*
t
.
nr
()
+
row
)
*
t
.
nc
()
+
column
;
}
};
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel_weighted
=
add_loss_layer
<
loss_multiclass_log_per_pixel_weighted_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
}
...
...
dlib/dnn/loss_abstract.h
View file @
ebdc064c
...
@@ -863,6 +863,94 @@ namespace dlib
...
@@ -863,6 +863,94 @@ namespace dlib
template
<
typename
SUBNET
>
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel
=
add_loss_layer
<
loss_multiclass_log_per_pixel_
,
SUBNET
>
;
using
loss_multiclass_log_per_pixel
=
add_loss_layer
<
loss_multiclass_log_per_pixel_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
class
loss_multiclass_log_per_pixel_weighted_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the multiclass logistic
regression loss (e.g. negative log-likelihood loss), which is appropriate
for multiclass classification problems. It is basically just like
loss_multiclass_log_per_pixel_ except that it lets you define per-pixel
weights, which may be useful e.g. if you want to emphasize rare classes
while training. (If the classification problem is difficult, a flat weight
structure may lead the network to always predict the most common label, in
particular if the degree of imbalance is high. To emphasize a certain
class or classes, simply increase the weights of the corresponding pixels,
relative to the weights of the other pixels.)
Note that if you set the weight to 0 whenever a pixel's label is equal to
loss_multiclass_log_per_pixel_::label_to_ignore, and to 1 otherwise, then
you essentially get loss_multiclass_log_per_pixel_ as a special case.
!*/
public:
struct
weighted_label
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents the truth label of a single pixel, together with
an associated weight (the higher the weight, the more emphasis the
corresponding pixel is given during the training).
!*/
weighted_label
();
weighted_label
(
uint16_t
label
,
float
weight
=
1.
f
);
// The ground-truth label. In semantic segmentation, 65536 classes ought to be
// enough for anybody.
uint16_t
label
=
0
;
// The weight of the corresponding pixel.
float
weight
=
1.
f
;
};
typedef
matrix
<
weighted_label
>
training_label_type
;
typedef
matrix
<
uint16_t
>
output_label_type
;
template
<
typename
SUB_TYPE
,
typename
label_iterator
>
void
to_label
(
const
tensor
&
input_tensor
,
const
SUB_TYPE
&
sub
,
label_iterator
iter
)
const
;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output label is the predicted class for each classified element. The number
of possible output classes is sub.get_output().k().
!*/
template
<
typename
const_label_iterator
,
typename
SUBNET
>
double
compute_loss_value_and_gradient
(
const
tensor
&
input_tensor
,
const_label_iterator
truth
,
SUBNET
&
sub
)
const
;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- all labels pointed to by truth are < sub.get_output().k(), or the corresponding weight
is zero.
!*/
};
template
<
typename
SUBNET
>
using
loss_multiclass_log_per_pixel_weighted
=
add_loss_layer
<
loss_multiclass_log_per_pixel_weighted_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
}
}
...
...
dlib/test/dnn.cpp
View file @
ebdc064c
...
@@ -2331,7 +2331,102 @@ namespace
...
@@ -2331,7 +2331,102 @@ namespace
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
void
test_tensor_resize_bilienar
(
long
samps
,
long
k
,
long
nr
,
long
nc
,
long
onr
,
long
onc
)
void
test_loss_multiclass_per_pixel_weighted
()
{
// Train with pixel-specific weights
print_spinner
();
constexpr
int
input_height
=
5
;
constexpr
int
input_width
=
7
;
constexpr
int
output_height
=
input_height
;
constexpr
int
output_width
=
input_width
;
const
int
num_samples
=
1000
;
const
int
num_classes
=
6
;
::
std
::
default_random_engine
generator
(
16
);
::
std
::
uniform_real_distribution
<
double
>
u01
(
0.0
,
1.0
);
::
std
::
uniform_int_distribution
<
uint16_t
>
noisy_label
(
0
,
num_classes
-
1
);
::
std
::
vector
<
matrix
<
double
>>
x
(
num_samples
);
::
std
::
vector
<
matrix
<
uint16_t
>>
y
(
num_samples
);
matrix
<
double
>
xtmp
(
input_height
,
input_width
);
matrix
<
uint16_t
>
ytmp
(
output_height
,
output_width
);
// Generate input data
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
for
(
int
jj
=
0
;
jj
<
input_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
input_width
;
++
kk
)
{
xtmp
(
jj
,
kk
)
=
u01
(
generator
);
ytmp
(
jj
,
kk
)
=
noisy_label
(
generator
);
}
}
x
[
ii
]
=
xtmp
;
y
[
ii
]
=
ytmp
;
}
using
net_type
=
loss_multiclass_log_per_pixel_weighted
<
con
<
num_classes
,
1
,
1
,
1
,
1
,
input
<
matrix
<
double
>>>>
;
using
weighted_label
=
loss_multiclass_log_per_pixel_weighted_
::
weighted_label
;
::
std
::
vector
<
matrix
<
weighted_label
>>
y_weighted
(
num_samples
);
for
(
int
weighted_class
=
0
;
weighted_class
<
num_classes
;
++
weighted_class
)
{
print_spinner
();
// Assign weights
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
if
(
weighted_class
==
0
)
{
y_weighted
[
ii
].
set_size
(
input_height
,
input_width
);
}
for
(
int
jj
=
0
;
jj
<
input_height
;
++
jj
)
{
for
(
int
kk
=
0
;
kk
<
input_width
;
++
kk
)
{
const
uint16_t
label
=
y
[
ii
](
jj
,
kk
);
const
float
weight
=
label
==
weighted_class
?
1.1
f
:
0.9
f
;
y_weighted
[
ii
](
jj
,
kk
)
=
weighted_label
(
label
,
weight
);
}
}
}
net_type
net
;
sgd
defsolver
(
0
,
0.9
);
dnn_trainer
<
net_type
>
trainer
(
net
,
defsolver
);
trainer
.
set_learning_rate
(
0.1
);
trainer
.
set_min_learning_rate
(
0.01
);
trainer
.
set_mini_batch_size
(
10
);
trainer
.
set_max_num_epochs
(
10
);
trainer
.
train
(
x
,
y_weighted
);
const
::
std
::
vector
<
matrix
<
uint16_t
>>
predictions
=
net
(
x
);
int
num_weighted_class
=
0
;
int
num_not_weighted_class
=
0
;
for
(
int
ii
=
0
;
ii
<
num_samples
;
++
ii
)
{
const
matrix
<
uint16_t
>&
prediction
=
predictions
[
ii
];
DLIB_TEST
(
prediction
.
nr
()
==
output_height
);
DLIB_TEST
(
prediction
.
nc
()
==
output_width
);
for
(
int
jj
=
0
;
jj
<
output_height
;
++
jj
)
for
(
int
kk
=
0
;
kk
<
output_width
;
++
kk
)
if
(
prediction
(
jj
,
kk
)
==
weighted_class
)
++
num_weighted_class
;
else
++
num_not_weighted_class
;
}
DLIB_TEST_MSG
(
num_weighted_class
>
num_not_weighted_class
,
"The weighted class ("
<<
weighted_class
<<
") does not dominate: "
<<
num_weighted_class
<<
" <= "
<<
num_not_weighted_class
);
}
}
// ----------------------------------------------------------------------------------------
void
test_tensor_resize_bilinear
(
long
samps
,
long
k
,
long
nr
,
long
nc
,
long
onr
,
long
onc
)
{
{
resizable_tensor
img
(
samps
,
k
,
nr
,
nc
);
resizable_tensor
img
(
samps
,
k
,
nr
,
nc
);
resizable_tensor
out
(
samps
,
k
,
onr
,
onc
);
resizable_tensor
out
(
samps
,
k
,
onr
,
onc
);
...
@@ -2426,9 +2521,9 @@ namespace
...
@@ -2426,9 +2521,9 @@ namespace
compare_adam
();
compare_adam
();
test_copy_tensor_gpu
();
test_copy_tensor_gpu
();
#endif
#endif
test_tensor_resize_bili
e
nar
(
2
,
3
,
6
,
6
,
11
,
11
);
test_tensor_resize_bilin
e
ar
(
2
,
3
,
6
,
6
,
11
,
11
);
test_tensor_resize_bili
e
nar
(
2
,
3
,
6
,
6
,
3
,
4
);
test_tensor_resize_bilin
e
ar
(
2
,
3
,
6
,
6
,
3
,
4
);
test_tensor_resize_bili
e
nar
(
2
,
3
,
5
,
6
,
12
,
21
);
test_tensor_resize_bilin
e
ar
(
2
,
3
,
5
,
6
,
12
,
21
);
test_max_pool
(
1
,
1
,
2
,
3
,
0
,
0
);
test_max_pool
(
1
,
1
,
2
,
3
,
0
,
0
);
test_max_pool
(
3
,
3
,
1
,
1
,
0
,
0
);
test_max_pool
(
3
,
3
,
1
,
1
,
0
,
0
);
test_max_pool
(
3
,
3
,
2
,
2
,
0
,
0
);
test_max_pool
(
3
,
3
,
2
,
2
,
0
,
0
);
...
@@ -2469,6 +2564,7 @@ namespace
...
@@ -2469,6 +2564,7 @@ namespace
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task
();
test_loss_multiclass_per_pixel_outputs_on_trivial_task
();
test_loss_multiclass_per_pixel_outputs_on_trivial_task
();
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore
();
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore
();
test_loss_multiclass_per_pixel_weighted
();
}
}
void
perform_test
()
void
perform_test
()
...
...
setup.py
View file @
ebdc064c
...
@@ -526,7 +526,11 @@ class build(_build):
...
@@ -526,7 +526,11 @@ class build(_build):
# this checks the sysconfig and will correctly pick up a brewed python lib
# this checks the sysconfig and will correctly pick up a brewed python lib
# e.g. in /usr/local/Cellar
# e.g. in /usr/local/Cellar
py_ver
=
get_python_version
()
py_ver
=
get_python_version
()
# check: in some virtual environments the libpython has the form "libpython_#m.dylib
py_lib
=
os
.
path
.
join
(
get_config_var
(
'LIBDIR'
),
'libpython'
+
py_ver
+
'.dylib'
)
py_lib
=
os
.
path
.
join
(
get_config_var
(
'LIBDIR'
),
'libpython'
+
py_ver
+
'.dylib'
)
if
not
os
.
path
.
isfile
(
py_lib
):
py_lib
=
os
.
path
.
join
(
get_config_var
(
'LIBDIR'
),
'libpython'
+
py_ver
+
'm.dylib'
)
cmake_extra_arch
+=
[
'-DPYTHON_LIBRARY={lib}'
.
format
(
lib
=
py_lib
)]
cmake_extra_arch
+=
[
'-DPYTHON_LIBRARY={lib}'
.
format
(
lib
=
py_lib
)]
if
sys
.
platform
==
"win32"
:
if
sys
.
platform
==
"win32"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment