Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
4d121e7e
Commit
4d121e7e
authored
Dec 17, 2016
by
Davis King
Browse files
Added another metric learning example
parent
29047a22
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
262 additions
and
5 deletions
+262
-5
examples/CMakeLists.txt
examples/CMakeLists.txt
+1
-0
examples/dnn_metric_learning_ex.cpp
examples/dnn_metric_learning_ex.cpp
+10
-5
examples/dnn_metric_learning_on_images_ex.cpp
examples/dnn_metric_learning_on_images_ex.cpp
+251
-0
No files found.
examples/CMakeLists.txt
View file @
4d121e7e
...
...
@@ -47,6 +47,7 @@ if (NOT USING_OLD_VISUAL_STUDIO_COMPILER)
add_example
(
dnn_introduction2_ex
)
add_example
(
dnn_inception_ex
)
add_example
(
dnn_metric_learning_ex
)
add_example
(
dnn_metric_learning_on_images_ex
)
add_gui_example
(
dnn_imagenet_ex
)
add_gui_example
(
dnn_mmod_ex
)
add_gui_example
(
dnn_mmod_face_detection_ex
)
...
...
examples/dnn_metric_learning_ex.cpp
View file @
4d121e7e
...
...
@@ -47,12 +47,14 @@ int main() try
trainer
.
train
(
samples
,
labels
);
auto
embedded
=
net
(
samples
);
// Run all the images through the network to get their vector embeddings.
std
::
vector
<
matrix
<
float
,
0
,
1
>>
embedded
=
net
(
images
);
for
(
size_t
i
=
0
;
i
<
embedded
.
size
();
++
i
)
cout
<<
"label: "
<<
labels
[
i
]
<<
"
\t
"
<<
trans
(
embedded
[
i
]);
// now count how many pairs are correctly classified.
// Now, check if the embedding puts things with the same labels near each other and
// things with different labels far apart.
int
num_right
=
0
;
int
num_wrong
=
0
;
for
(
size_t
i
=
0
;
i
<
embedded
.
size
();
++
i
)
...
...
@@ -61,6 +63,9 @@ int main() try
{
if
(
labels
[
i
]
==
labels
[
j
])
{
// The loss_metric layer will cause things with the same label to be less
// than net.loss_details().get_distance_threshold() distance from each
// other. So we can use that distance value as our testing threshold.
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
<
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
...
...
@@ -68,10 +73,10 @@ int main() try
}
else
{
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
<
net
.
loss_details
().
get_distance_threshold
())
++
num_wrong
;
else
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
>=
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
++
num_wrong
;
}
}
}
...
...
examples/dnn_metric_learning_on_images_ex.cpp
0 → 100644
View file @
4d121e7e
#include <dlib/dnn.h>
#include <dlib/image_io.h>
#include <dlib/misc_api.h>
using
namespace
dlib
;
using
namespace
std
;
std
::
vector
<
std
::
vector
<
string
>>
load_objects_list
(
const
string
&
dir
)
{
std
::
vector
<
std
::
vector
<
string
>>
objects
;
for
(
auto
subdir
:
directory
(
dir
).
get_dirs
())
{
std
::
vector
<
string
>
imgs
;
for
(
auto
img
:
subdir
.
get_files
())
imgs
.
push_back
(
img
);
objects
.
push_back
(
imgs
);
}
return
objects
;
}
void
load_mini_batch
(
const
size_t
num_ids
,
const
size_t
samples_per_id
,
dlib
::
rand
&
rnd
,
const
std
::
vector
<
std
::
vector
<
string
>>&
objs
,
std
::
vector
<
matrix
<
rgb_pixel
>>&
images
,
std
::
vector
<
unsigned
long
>&
labels
)
{
images
.
clear
();
labels
.
clear
();
matrix
<
rgb_pixel
>
image
;
for
(
size_t
i
=
0
;
i
<
num_ids
;
++
i
)
{
const
size_t
id
=
rnd
.
get_random_32bit_number
()
%
objs
.
size
();
for
(
size_t
j
=
0
;
j
<
samples_per_id
;
++
j
)
{
const
auto
&
obj
=
objs
[
id
][
rnd
.
get_random_32bit_number
()
%
objs
[
id
].
size
()];
load_image
(
image
,
obj
);
images
.
push_back
(
std
::
move
(
image
));
labels
.
push_back
(
id
);
}
}
// You might want to do some data augmentation at this point. Here we so some simple
// color augmentation.
for
(
auto
&&
crop
:
images
)
disturb_colors
(
crop
,
rnd
);
// All the images going into a mini-batch have to be the same size. And really, all
// the images in your entire training dataset should be the same size for what we are
// doing to make the most sense.
DLIB_CASSERT
(
images
.
size
()
>
0
);
for
(
auto
&&
img
:
images
)
{
DLIB_CASSERT
(
img
.
nr
()
==
images
[
0
].
nr
()
&&
img
.
nc
()
==
images
[
0
].
nc
(),
"All the images in a single mini-batch must be the same size."
);
}
}
// ----------------------------------------------------------------------------------------
template
<
template
<
int
,
template
<
typename
>
class
,
int
,
typename
>
class
block
,
int
N
,
template
<
typename
>
class
BN
,
typename
SUBNET
>
using
residual
=
add_prev1
<
block
<
N
,
BN
,
1
,
tag1
<
SUBNET
>>>
;
template
<
template
<
int
,
template
<
typename
>
class
,
int
,
typename
>
class
block
,
int
N
,
template
<
typename
>
class
BN
,
typename
SUBNET
>
using
residual_down
=
add_prev2
<
avg_pool
<
2
,
2
,
2
,
2
,
skip1
<
tag2
<
block
<
N
,
BN
,
2
,
tag1
<
SUBNET
>>>>>>
;
template
<
int
N
,
template
<
typename
>
class
BN
,
int
stride
,
typename
SUBNET
>
using
block
=
BN
<
con
<
N
,
3
,
3
,
1
,
1
,
relu
<
BN
<
con
<
N
,
3
,
3
,
stride
,
stride
,
SUBNET
>>>>>
;
template
<
int
N
,
typename
SUBNET
>
using
res
=
relu
<
residual
<
block
,
N
,
bn_con
,
SUBNET
>>
;
template
<
int
N
,
typename
SUBNET
>
using
ares
=
relu
<
residual
<
block
,
N
,
affine
,
SUBNET
>>
;
template
<
int
N
,
typename
SUBNET
>
using
res_down
=
relu
<
residual_down
<
block
,
N
,
bn_con
,
SUBNET
>>
;
template
<
int
N
,
typename
SUBNET
>
using
ares_down
=
relu
<
residual_down
<
block
,
N
,
affine
,
SUBNET
>>
;
// ----------------------------------------------------------------------------------------
template
<
typename
SUBNET
>
using
level1
=
res
<
512
,
res
<
512
,
res_down
<
512
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
level2
=
res
<
256
,
res
<
256
,
res
<
256
,
res
<
256
,
res
<
256
,
res_down
<
256
,
SUBNET
>>>>>>
;
template
<
typename
SUBNET
>
using
level3
=
res
<
128
,
res
<
128
,
res
<
128
,
res_down
<
128
,
SUBNET
>>>>
;
template
<
typename
SUBNET
>
using
level4
=
res
<
64
,
res
<
64
,
res
<
64
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
alevel1
=
ares
<
512
,
ares
<
512
,
ares_down
<
512
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
alevel2
=
ares
<
256
,
ares
<
256
,
ares
<
256
,
ares
<
256
,
ares
<
256
,
ares_down
<
256
,
SUBNET
>>>>>>
;
template
<
typename
SUBNET
>
using
alevel3
=
ares
<
128
,
ares
<
128
,
ares
<
128
,
ares_down
<
128
,
SUBNET
>>>>
;
template
<
typename
SUBNET
>
using
alevel4
=
ares
<
64
,
ares
<
64
,
ares
<
64
,
SUBNET
>>>
;
template
<
typename
SUBNET
>
using
final_pooling
=
avg_pool_everything
<
SUBNET
>
;
template
<
typename
SUBNET
>
using
afinal_pooling
=
avg_pool_everything
<
SUBNET
>
;
// training network type
using
net_type
=
loss_metric
<
fc_no_bias
<
128
,
final_pooling
<
level1
<
level2
<
level3
<
level4
<
max_pool
<
3
,
3
,
2
,
2
,
relu
<
bn_con
<
con
<
64
,
7
,
7
,
2
,
2
,
input_rgb_image
>>>>>>>>>>>
;
// testing network type (replaced batch normalization with fixed affine transforms)
using
anet_type
=
loss_metric
<
fc_no_bias
<
128
,
afinal_pooling
<
alevel1
<
alevel2
<
alevel3
<
alevel4
<
max_pool
<
3
,
3
,
2
,
2
,
relu
<
affine
<
con
<
64
,
7
,
7
,
2
,
2
,
input_rgb_image
>>>>>>>>>>>
;
// ----------------------------------------------------------------------------------------
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
!=
2
)
{
cout
<<
"Give folder as input. It should contain sub-folders of images and we will "
<<
endl
;
cout
<<
"learn to distinguish these sub-folders with metric learning."
<<
endl
;
return
1
;
}
auto
objs
=
load_objects_list
(
argv
[
1
]);
cout
<<
"objs.size(): "
<<
objs
.
size
()
<<
endl
;
std
::
vector
<
matrix
<
rgb_pixel
>>
images
;
std
::
vector
<
unsigned
long
>
labels
;
net_type
net
;
dnn_trainer
<
net_type
>
trainer
(
net
,
sgd
(
0.0005
,
0.9
));
trainer
.
set_learning_rate
(
0.1
);
trainer
.
be_verbose
();
trainer
.
set_synchronization_file
(
"face_metric_sync"
,
std
::
chrono
::
minutes
(
5
));
trainer
.
set_iterations_without_progress_threshold
(
300
);
// It's important to feed the GPU fast enough to keep it occupied. So here we create a
// bunch of threads that are responsible for creating mini-batches of training data.
dlib
::
pipe
<
std
::
vector
<
matrix
<
rgb_pixel
>>>
qimages
(
4
);
dlib
::
pipe
<
std
::
vector
<
unsigned
long
>>
qlabels
(
4
);
auto
data_loader
=
[
&
qimages
,
&
qlabels
,
&
objs
](
time_t
seed
)
{
dlib
::
rand
rnd
(
time
(
0
)
+
seed
);
std
::
vector
<
matrix
<
rgb_pixel
>>
images
;
std
::
vector
<
unsigned
long
>
labels
;
while
(
qimages
.
is_enabled
())
{
try
{
load_mini_batch
(
15
,
15
,
rnd
,
objs
,
images
,
labels
);
qimages
.
enqueue
(
images
);
qlabels
.
enqueue
(
labels
);
}
catch
(
std
::
exception
&
e
)
{
cout
<<
"EXCEPTION IN LOADING DATA"
<<
endl
;
cout
<<
e
.
what
()
<<
endl
;
}
}
};
std
::
thread
data_loader1
([
data_loader
](){
data_loader
(
1
);
});
std
::
thread
data_loader2
([
data_loader
](){
data_loader
(
2
);
});
std
::
thread
data_loader3
([
data_loader
](){
data_loader
(
3
);
});
std
::
thread
data_loader4
([
data_loader
](){
data_loader
(
4
);
});
std
::
thread
data_loader5
([
data_loader
](){
data_loader
(
5
);
});
// Here we do the training. We keep passing mini-batches to the trainer until the
// learning rate has dropped low enough.
while
(
trainer
.
get_learning_rate
()
>=
1e-4
)
{
qimages
.
dequeue
(
images
);
qlabels
.
dequeue
(
labels
);
trainer
.
train_one_step
(
images
,
labels
);
}
// wait for training threads to stop
trainer
.
get_net
();
cout
<<
"done training"
<<
endl
;
// Save the network to disk
net
.
clean
();
serialize
(
"metric_network_renset.dat"
)
<<
net
;
// stop all the data loading threads and wait for them to terminate.
qimages
.
disable
();
qlabels
.
disable
();
data_loader1
.
join
();
data_loader2
.
join
();
data_loader3
.
join
();
data_loader4
.
join
();
data_loader5
.
join
();
// Now, just to show an example of how you would use the network, lets check how well
// it performs on the training data.
dlib
::
rand
rnd
(
time
(
0
));
load_mini_batch
(
15
,
15
,
rnd
,
objs
,
images
,
labels
);
// Run all the images through the network to get their vector embeddings.
std
::
vector
<
matrix
<
float
,
0
,
1
>>
embedded
=
net
(
images
);
// Now, check if the embedding puts things with the same labels near each other and
// things with different labels far apart.
int
num_right
=
0
;
int
num_wrong
=
0
;
for
(
size_t
i
=
0
;
i
<
embedded
.
size
();
++
i
)
{
for
(
size_t
j
=
i
+
1
;
j
<
embedded
.
size
();
++
j
)
{
if
(
labels
[
i
]
==
labels
[
j
])
{
// The loss_metric layer will cause things with the same label to be less
// than net.loss_details().get_distance_threshold() distance from each
// other. So we can use that distance value as our testing threshold.
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
<
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
++
num_wrong
;
}
else
{
if
(
length
(
embedded
[
i
]
-
embedded
[
j
])
>=
net
.
loss_details
().
get_distance_threshold
())
++
num_right
;
else
++
num_wrong
;
}
}
}
cout
<<
"num_right: "
<<
num_right
<<
endl
;
cout
<<
"num_wrong: "
<<
num_wrong
<<
endl
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment