Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
7b006f37
"docs/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "f55190b275ac22d494eacab6981bd52f3d1faffc"
Commit
7b006f37
authored
Sep 10, 2018
by
Davis King
Browse files
Added an option to do bounding box regression to the loss_mmod layer.
parent
16c96bc5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
148 additions
and
12 deletions
+148
-12
dlib/dnn/loss.h
dlib/dnn/loss.h
+133
-12
dlib/dnn/loss_abstract.h
dlib/dnn/loss_abstract.h
+15
-0
No files found.
dlib/dnn/loss.h
View file @
7b006f37
...
@@ -702,6 +702,8 @@ namespace dlib
...
@@ -702,6 +702,8 @@ namespace dlib
double
truth_match_iou_threshold
=
0.5
;
double
truth_match_iou_threshold
=
0.5
;
test_box_overlap
overlaps_nms
=
test_box_overlap
(
0.4
);
test_box_overlap
overlaps_nms
=
test_box_overlap
(
0.4
);
test_box_overlap
overlaps_ignore
;
test_box_overlap
overlaps_ignore
;
bool
use_bounding_box_regression
=
false
;
double
bbr_lambda
=
100
;
use_image_pyramid
assume_image_pyramid
=
use_image_pyramid
::
yes
;
use_image_pyramid
assume_image_pyramid
=
use_image_pyramid
::
yes
;
...
@@ -937,7 +939,7 @@ namespace dlib
...
@@ -937,7 +939,7 @@ namespace dlib
inline
void
serialize
(
const
mmod_options
&
item
,
std
::
ostream
&
out
)
inline
void
serialize
(
const
mmod_options
&
item
,
std
::
ostream
&
out
)
{
{
int
version
=
3
;
int
version
=
4
;
serialize
(
version
,
out
);
serialize
(
version
,
out
);
serialize
(
item
.
detector_windows
,
out
);
serialize
(
item
.
detector_windows
,
out
);
...
@@ -947,13 +949,15 @@ namespace dlib
...
@@ -947,13 +949,15 @@ namespace dlib
serialize
(
item
.
overlaps_nms
,
out
);
serialize
(
item
.
overlaps_nms
,
out
);
serialize
(
item
.
overlaps_ignore
,
out
);
serialize
(
item
.
overlaps_ignore
,
out
);
serialize
(
static_cast
<
uint8_t
>
(
item
.
assume_image_pyramid
),
out
);
serialize
(
static_cast
<
uint8_t
>
(
item
.
assume_image_pyramid
),
out
);
serialize
(
item
.
use_bounding_box_regression
,
out
);
serialize
(
item
.
bbr_lambda
,
out
);
}
}
inline
void
deserialize
(
mmod_options
&
item
,
std
::
istream
&
in
)
inline
void
deserialize
(
mmod_options
&
item
,
std
::
istream
&
in
)
{
{
int
version
=
0
;
int
version
=
0
;
deserialize
(
version
,
in
);
deserialize
(
version
,
in
);
if
(
version
!=
3
&&
version
!=
2
&&
version
!
=
1
)
if
(
!
(
1
<=
version
&&
version
<
=
4
)
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::mmod_options"
);
throw
serialization_error
(
"Unexpected version found while deserializing dlib::mmod_options"
);
if
(
version
==
1
)
if
(
version
==
1
)
{
{
...
@@ -979,6 +983,13 @@ namespace dlib
...
@@ -979,6 +983,13 @@ namespace dlib
deserialize
(
assume_image_pyramid
,
in
);
deserialize
(
assume_image_pyramid
,
in
);
item
.
assume_image_pyramid
=
static_cast
<
use_image_pyramid
>
(
assume_image_pyramid
);
item
.
assume_image_pyramid
=
static_cast
<
use_image_pyramid
>
(
assume_image_pyramid
);
}
}
item
.
use_bounding_box_regression
=
mmod_options
().
use_bounding_box_regression
;
// use default value since this wasn't provided
item
.
bbr_lambda
=
mmod_options
().
bbr_lambda
;
// use default value since this wasn't provided
if
(
version
>=
4
)
{
deserialize
(
item
.
use_bounding_box_regression
,
in
);
deserialize
(
item
.
bbr_lambda
,
in
);
}
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
@@ -991,20 +1002,31 @@ namespace dlib
...
@@ -991,20 +1002,31 @@ namespace dlib
intermediate_detection
(
intermediate_detection
(
rectangle
rect_
rectangle
rect_
)
:
rect
(
rect_
)
{}
)
:
rect
(
rect_
)
,
rect_bbr
(
rect_
)
{}
intermediate_detection
(
intermediate_detection
(
rectangle
rect_
,
rectangle
rect_
,
double
detection_confidence_
,
double
detection_confidence_
,
size_t
tensor_offset_
,
size_t
tensor_offset_
,
long
channel
long
channel
)
:
rect
(
rect_
),
detection_confidence
(
detection_confidence_
),
tensor_offset
(
tensor_offset_
),
tensor_channel
(
channel
)
{}
)
:
rect
(
rect_
),
rect_bbr
(
rect_
),
detection_confidence
(
detection_confidence_
),
tensor_offset
(
tensor_offset_
),
tensor_channel
(
channel
)
{}
// rect is the rectangle you get without any bounding box regression. So it's
// the basic sliding window box (aka, the "anchor box").
rectangle
rect
;
rectangle
rect
;
double
detection_confidence
=
0
;
double
detection_confidence
=
0
;
size_t
tensor_offset
=
0
;
size_t
tensor_offset
=
0
;
long
tensor_channel
=
0
;
long
tensor_channel
=
0
;
// rect_bbr = rect + bounding box regression. So more accurate. Or if bbr is off then
// this is just rect. The important thing about rect_bbr is that its the
// rectangle we use for doing NMS.
drectangle
rect_bbr
;
size_t
tensor_offset_dx
=
0
;
size_t
tensor_offset_dy
=
0
;
size_t
tensor_offset_dw
=
0
;
size_t
tensor_offset_dh
=
0
;
bool
operator
<
(
const
intermediate_detection
&
item
)
const
{
return
detection_confidence
<
item
.
detection_confidence
;
}
bool
operator
<
(
const
intermediate_detection
&
item
)
const
{
return
detection_confidence
<
item
.
detection_confidence
;
}
};
};
...
@@ -1032,7 +1054,14 @@ namespace dlib
...
@@ -1032,7 +1054,14 @@ namespace dlib
)
const
)
const
{
{
const
tensor
&
output_tensor
=
sub
.
get_output
();
const
tensor
&
output_tensor
=
sub
.
get_output
();
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
if
(
options
.
use_bounding_box_regression
)
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
()
*
5
);
}
else
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
}
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
,
sub
.
sample_expansion_factor
());
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
,
sub
.
sample_expansion_factor
());
...
@@ -1046,10 +1075,10 @@ namespace dlib
...
@@ -1046,10 +1075,10 @@ namespace dlib
final_dets
.
clear
();
final_dets
.
clear
();
for
(
unsigned
long
i
=
0
;
i
<
dets_accum
.
size
();
++
i
)
for
(
unsigned
long
i
=
0
;
i
<
dets_accum
.
size
();
++
i
)
{
{
if
(
overlaps_any_box_nms
(
final_dets
,
dets_accum
[
i
].
rect
))
if
(
overlaps_any_box_nms
(
final_dets
,
dets_accum
[
i
].
rect
_bbr
))
continue
;
continue
;
final_dets
.
push_back
(
mmod_rect
(
dets_accum
[
i
].
rect
,
final_dets
.
push_back
(
mmod_rect
(
dets_accum
[
i
].
rect
_bbr
,
dets_accum
[
i
].
detection_confidence
,
dets_accum
[
i
].
detection_confidence
,
options
.
detector_windows
[
dets_accum
[
i
].
tensor_channel
].
label
));
options
.
detector_windows
[
dets_accum
[
i
].
tensor_channel
].
label
));
}
}
...
@@ -1075,13 +1104,19 @@ namespace dlib
...
@@ -1075,13 +1104,19 @@ namespace dlib
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
sub
.
sample_expansion_factor
()
==
1
);
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
grad
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
input_tensor
.
num_samples
()
==
output_tensor
.
num_samples
());
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
if
(
options
.
use_bounding_box_regression
)
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
()
*
5
);
}
else
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
}
double
det_thresh_speed_adjust
=
0
;
double
det_thresh_speed_adjust
=
0
;
// we will scale the loss so that it doesn't get really huge
// we will scale the loss so that it doesn't get really huge
const
double
scale
=
1.0
/
output_tensor
.
size
();
const
double
scale
=
1.0
/
(
output_tensor
.
nr
()
*
output_tensor
.
nc
()
*
output_tensor
.
num_samples
()
*
options
.
detector_windows
.
size
()
)
;
double
loss
=
0
;
double
loss
=
0
;
float
*
g
=
grad
.
host_write_only
();
float
*
g
=
grad
.
host_write_only
();
...
@@ -1230,6 +1265,59 @@ namespace dlib
...
@@ -1230,6 +1265,59 @@ namespace dlib
hit_truth_table
[
hittruth
.
second
]
=
true
;
hit_truth_table
[
hittruth
.
second
]
=
true
;
final_dets
.
push_back
(
dets
[
i
]);
final_dets
.
push_back
(
dets
[
i
]);
loss
-=
options
.
loss_per_missed_target
;
loss
-=
options
.
loss_per_missed_target
;
// Now account for BBR loss and gradient if appropriate.
if
(
options
.
use_bounding_box_regression
)
{
double
dx
=
out_data
[
dets
[
i
].
tensor_offset_dx
];
double
dy
=
out_data
[
dets
[
i
].
tensor_offset_dy
];
double
dw
=
out_data
[
dets
[
i
].
tensor_offset_dw
];
double
dh
=
out_data
[
dets
[
i
].
tensor_offset_dh
];
dpoint
p
=
dcenter
(
dets
[
i
].
rect_bbr
);
double
w
=
dets
[
i
].
rect_bbr
.
width
()
-
1
;
double
h
=
dets
[
i
].
rect_bbr
.
height
()
-
1
;
drectangle
truth_box
=
(
*
truth
)[
hittruth
.
second
].
rect
;
dpoint
p_truth
=
dcenter
(
truth_box
);
DLIB_CASSERT
(
w
>
0
);
DLIB_CASSERT
(
h
>
0
);
double
target_dx
=
(
p_truth
.
x
()
-
p
.
x
())
/
w
;
double
target_dy
=
(
p_truth
.
y
()
-
p
.
y
())
/
h
;
double
target_dw
=
std
::
log
((
truth_box
.
width
()
-
1
)
/
w
);
double
target_dh
=
std
::
log
((
truth_box
.
height
()
-
1
)
/
h
);
// compute smoothed L1 loss on BBR outputs. This loss
// is just the MSE loss when the loss is small and L1
// when large.
dx
=
dx
-
target_dx
;
dy
=
dy
-
target_dy
;
dw
=
dw
-
target_dw
;
dh
=
dh
-
target_dh
;
// use smoothed L1
double
ldx
=
std
::
abs
(
dx
)
<
1
?
0.5
*
dx
*
dx
:
std
::
abs
(
dx
)
-
0.5
;
double
ldy
=
std
::
abs
(
dy
)
<
1
?
0.5
*
dy
*
dy
:
std
::
abs
(
dy
)
-
0.5
;
double
ldw
=
std
::
abs
(
dw
)
<
1
?
0.5
*
dw
*
dw
:
std
::
abs
(
dw
)
-
0.5
;
double
ldh
=
std
::
abs
(
dh
)
<
1
?
0.5
*
dh
*
dh
:
std
::
abs
(
dh
)
-
0.5
;
loss
+=
options
.
bbr_lambda
*
(
ldx
+
ldy
+
ldw
+
ldh
);
// now compute the derivatives of the smoothed L1 loss
ldx
=
put_in_range
(
-
1
,
1
,
dx
);
ldy
=
put_in_range
(
-
1
,
1
,
dy
);
ldw
=
put_in_range
(
-
1
,
1
,
dw
);
ldh
=
put_in_range
(
-
1
,
1
,
dh
);
// also smoothed L1 gradient goes to gradient output
g
[
dets
[
i
].
tensor_offset_dx
]
+=
scale
*
options
.
bbr_lambda
*
ldx
;
g
[
dets
[
i
].
tensor_offset_dy
]
+=
scale
*
options
.
bbr_lambda
*
ldy
;
g
[
dets
[
i
].
tensor_offset_dw
]
+=
scale
*
options
.
bbr_lambda
*
ldw
;
g
[
dets
[
i
].
tensor_offset_dh
]
+=
scale
*
options
.
bbr_lambda
*
ldh
;
}
}
}
else
else
{
{
...
@@ -1299,6 +1387,9 @@ namespace dlib
...
@@ -1299,6 +1387,9 @@ namespace dlib
out
<<
", loss per FA:"
<<
opts
.
loss_per_false_alarm
;
out
<<
", loss per FA:"
<<
opts
.
loss_per_false_alarm
;
out
<<
", loss per miss:"
<<
opts
.
loss_per_missed_target
;
out
<<
", loss per miss:"
<<
opts
.
loss_per_missed_target
;
out
<<
", truth match IOU thresh:"
<<
opts
.
truth_match_iou_threshold
;
out
<<
", truth match IOU thresh:"
<<
opts
.
truth_match_iou_threshold
;
out
<<
", use_bounding_box_regression:"
<<
opts
.
use_bounding_box_regression
;
if
(
opts
.
use_bounding_box_regression
)
out
<<
", bbr_lambda:"
<<
opts
.
bbr_lambda
;
out
<<
", overlaps_nms:("
<<
opts
.
overlaps_nms
.
get_iou_thresh
()
<<
","
<<
opts
.
overlaps_nms
.
get_percent_covered_thresh
()
<<
")"
;
out
<<
", overlaps_nms:("
<<
opts
.
overlaps_nms
.
get_iou_thresh
()
<<
","
<<
opts
.
overlaps_nms
.
get_percent_covered_thresh
()
<<
")"
;
out
<<
", overlaps_ignore:("
<<
opts
.
overlaps_ignore
.
get_iou_thresh
()
<<
","
<<
opts
.
overlaps_ignore
.
get_percent_covered_thresh
()
<<
")"
;
out
<<
", overlaps_ignore:("
<<
opts
.
overlaps_ignore
.
get_iou_thresh
()
<<
","
<<
opts
.
overlaps_ignore
.
get_percent_covered_thresh
()
<<
")"
;
...
@@ -1325,11 +1416,19 @@ namespace dlib
...
@@ -1325,11 +1416,19 @@ namespace dlib
)
const
)
const
{
{
DLIB_CASSERT
(
net
.
sample_expansion_factor
()
==
1
,
net
.
sample_expansion_factor
());
DLIB_CASSERT
(
net
.
sample_expansion_factor
()
==
1
,
net
.
sample_expansion_factor
());
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
if
(
options
.
use_bounding_box_regression
)
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
()
*
5
);
}
else
{
DLIB_CASSERT
(
output_tensor
.
k
()
==
(
long
)
options
.
detector_windows
.
size
());
}
const
float
*
out_data
=
output_tensor
.
host
()
+
output_tensor
.
k
()
*
output_tensor
.
nr
()
*
output_tensor
.
nc
()
*
i
;
const
float
*
out_data
=
output_tensor
.
host
()
+
output_tensor
.
k
()
*
output_tensor
.
nr
()
*
output_tensor
.
nc
()
*
i
;
// scan the final layer and output the positive scoring locations
// scan the final layer and output the positive scoring locations
dets_accum
.
clear
();
dets_accum
.
clear
();
for
(
long
k
=
0
;
k
<
o
utput_tensor
.
k
();
++
k
)
for
(
long
k
=
0
;
k
<
o
ptions
.
detector_windows
.
size
();
++
k
)
{
{
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
for
(
long
r
=
0
;
r
<
output_tensor
.
nr
();
++
r
)
{
{
...
@@ -1343,6 +1442,28 @@ namespace dlib
...
@@ -1343,6 +1442,28 @@ namespace dlib
rect
=
input_layer
(
net
).
tensor_space_to_image_space
(
input_tensor
,
rect
);
rect
=
input_layer
(
net
).
tensor_space_to_image_space
(
input_tensor
,
rect
);
dets_accum
.
push_back
(
intermediate_detection
(
rect
,
score
,
(
k
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
,
k
));
dets_accum
.
push_back
(
intermediate_detection
(
rect
,
score
,
(
k
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
,
k
));
if
(
options
.
use_bounding_box_regression
)
{
const
auto
offset
=
options
.
detector_windows
.
size
()
+
k
*
4
;
dets_accum
.
back
().
tensor_offset_dx
=
((
offset
+
0
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
dets_accum
.
back
().
tensor_offset_dy
=
((
offset
+
1
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
dets_accum
.
back
().
tensor_offset_dw
=
((
offset
+
2
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
dets_accum
.
back
().
tensor_offset_dh
=
((
offset
+
3
)
*
output_tensor
.
nr
()
+
r
)
*
output_tensor
.
nc
()
+
c
;
// apply BBR to dets_accum.back()
double
dx
=
out_data
[
dets_accum
.
back
().
tensor_offset_dx
];
double
dy
=
out_data
[
dets_accum
.
back
().
tensor_offset_dy
];
double
dw
=
out_data
[
dets_accum
.
back
().
tensor_offset_dw
];
double
dh
=
out_data
[
dets_accum
.
back
().
tensor_offset_dh
];
dw
=
std
::
exp
(
dw
);
dh
=
std
::
exp
(
dh
);
double
w
=
rect
.
width
()
-
1
;
double
h
=
rect
.
height
()
-
1
;
rect
=
translate_rect
(
rect
,
dpoint
(
dx
*
w
,
dy
*
h
));
rect
=
centered_drect
(
rect
,
w
*
dw
+
1
,
h
*
dh
+
1
);
dets_accum
.
back
().
rect_bbr
=
rect
;
}
}
}
}
}
}
}
...
...
dlib/dnn/loss_abstract.h
View file @
7b006f37
...
@@ -652,6 +652,21 @@ namespace dlib
...
@@ -652,6 +652,21 @@ namespace dlib
// However, sometimes scale-invariance may not be desired.
// However, sometimes scale-invariance may not be desired.
use_image_pyramid
assume_image_pyramid
=
use_image_pyramid
::
yes
;
use_image_pyramid
assume_image_pyramid
=
use_image_pyramid
::
yes
;
// By default, the mmod loss doesn't train any bounding box regression model. But
// if you set use_bounding_box_regression == true then it expects the network to
// output a tensor with detector_windows.size()*5 channels rather than just
// detector_windows.size() channels. The 4 extra channels per window are trained
// to give a bounding box regression output that improves the positioning of the
// output detection box.
bool
use_bounding_box_regression
=
false
;
// When using bounding box regression, bbr_lambda determines how much you care
// about getting the bounding box shape correct vs just getting the detector to
// find objects. That is, the objective function being optimized is
// basic_mmod_loss + bbr_lambda*bounding_box_regression_loss. So setting
// bbr_lambda to a larger value will cause the overall loss to care more about
// getting the bounding box shape correct.
double
bbr_lambda
=
100
;
mmod_options
(
mmod_options
(
const
std
::
vector
<
std
::
vector
<
mmod_rect
>>&
boxes
,
const
std
::
vector
<
std
::
vector
<
mmod_rect
>>&
boxes
,
const
unsigned
long
target_size
,
const
unsigned
long
target_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment