Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
50012d2c
Commit
50012d2c
authored
Sep 23, 2013
by
Davis King
Browse files
merged
parents
28da9a42
3e559e42
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
127 additions
and
9 deletions
+127
-9
dlib/optimization/optimization.h
dlib/optimization/optimization.h
+27
-2
dlib/optimization/optimization_line_search.h
dlib/optimization/optimization_line_search.h
+78
-7
dlib/optimization/optimization_line_search_abstract.h
dlib/optimization/optimization_line_search_abstract.h
+22
-0
No files found.
dlib/optimization/optimization.h
View file @
50012d2c
...
@@ -456,6 +456,11 @@ namespace dlib
...
@@ -456,6 +456,11 @@ namespace dlib
const
matrix_exp
<
EXP2
>&
x_upper
const
matrix_exp
<
EXP2
>&
x_upper
)
)
{
{
/*
The implementation of this function is more or less based on the discussion in
the paper Projected Newton-type Methods in Machine Learning by Mark Schmidt, et al.
*/
// make sure the requires clause is not violated
// make sure the requires clause is not violated
COMPILE_TIME_ASSERT
(
is_matrix
<
T
>::
value
);
COMPILE_TIME_ASSERT
(
is_matrix
<
T
>::
value
);
DLIB_ASSERT
(
DLIB_ASSERT
(
...
@@ -490,6 +495,7 @@ namespace dlib
...
@@ -490,6 +495,7 @@ namespace dlib
// active constraint.
// active constraint.
const
double
gap_eps
=
1e-8
;
const
double
gap_eps
=
1e-8
;
double
last_alpha
=
1
;
while
(
stop_strategy
.
should_continue_search
(
x
,
f_value
,
g
))
while
(
stop_strategy
.
should_continue_search
(
x
,
f_value
,
g
))
{
{
s
=
search_strategy
.
get_next_direction
(
x
,
f_value
,
zero_bounded_variables
(
gap_eps
,
g
,
x
,
g
,
x_lower
,
x_upper
));
s
=
search_strategy
.
get_next_direction
(
x
,
f_value
,
zero_bounded_variables
(
gap_eps
,
g
,
x
,
g
,
x_lower
,
x_upper
));
...
@@ -499,10 +505,19 @@ namespace dlib
...
@@ -499,10 +505,19 @@ namespace dlib
make_line_search_function
(
clamp_function
(
f
,
x_lower
,
x_upper
),
x
,
s
,
f_value
),
make_line_search_function
(
clamp_function
(
f
,
x_lower
,
x_upper
),
x
,
s
,
f_value
),
f_value
,
f_value
,
dot
(
g
,
s
),
// compute gradient for the line search
dot
(
g
,
s
),
// compute gradient for the line search
1
,
last_alpha
,
search_strategy
.
get_wolfe_rho
(),
search_strategy
.
get_wolfe_rho
(),
search_strategy
.
get_max_line_search_iterations
());
search_strategy
.
get_max_line_search_iterations
());
// Do a trust region style thing for alpha. The idea is that if we take a
// small step then we are likely to take another small step. So we reuse the
// alpha from the last iteration unless the line search didn't shrink alpha at
// all, in that case, we start with a bigger alpha next time.
if
(
alpha
==
last_alpha
)
last_alpha
=
std
::
min
(
last_alpha
*
10
,
1.0
);
else
last_alpha
=
alpha
;
// Take the search step indicated by the above line search
// Take the search step indicated by the above line search
x
=
clamp
(
x
+
alpha
*
s
,
x_lower
,
x_upper
);
x
=
clamp
(
x
+
alpha
*
s
,
x_lower
,
x_upper
);
g
=
der
(
x
);
g
=
der
(
x
);
...
@@ -601,6 +616,7 @@ namespace dlib
...
@@ -601,6 +616,7 @@ namespace dlib
// active constraint.
// active constraint.
const
double
gap_eps
=
1e-8
;
const
double
gap_eps
=
1e-8
;
double
last_alpha
=
1
;
while
(
stop_strategy
.
should_continue_search
(
x
,
f_value
,
g
))
while
(
stop_strategy
.
should_continue_search
(
x
,
f_value
,
g
))
{
{
s
=
search_strategy
.
get_next_direction
(
x
,
f_value
,
zero_bounded_variables
(
gap_eps
,
g
,
x
,
g
,
x_lower
,
x_upper
));
s
=
search_strategy
.
get_next_direction
(
x
,
f_value
,
zero_bounded_variables
(
gap_eps
,
g
,
x
,
g
,
x_lower
,
x_upper
));
...
@@ -610,10 +626,19 @@ namespace dlib
...
@@ -610,10 +626,19 @@ namespace dlib
negate_function
(
make_line_search_function
(
clamp_function
(
f
,
x_lower
,
x_upper
),
x
,
s
,
f_value
)),
negate_function
(
make_line_search_function
(
clamp_function
(
f
,
x_lower
,
x_upper
),
x
,
s
,
f_value
)),
f_value
,
f_value
,
dot
(
g
,
s
),
// compute gradient for the line search
dot
(
g
,
s
),
// compute gradient for the line search
1
,
last_alpha
,
search_strategy
.
get_wolfe_rho
(),
search_strategy
.
get_wolfe_rho
(),
search_strategy
.
get_max_line_search_iterations
());
search_strategy
.
get_max_line_search_iterations
());
// Do a trust region style thing for alpha. The idea is that if we take a
// small step then we are likely to take another small step. So we reuse the
// alpha from the last iteration unless the line search didn't shrink alpha at
// all, in that case, we start with a bigger alpha next time.
if
(
alpha
==
last_alpha
)
last_alpha
=
std
::
min
(
last_alpha
*
10
,
1.0
);
else
last_alpha
=
alpha
;
// Take the search step indicated by the above line search
// Take the search step indicated by the above line search
x
=
clamp
(
x
+
alpha
*
s
,
x_lower
,
x_upper
);
x
=
clamp
(
x
+
alpha
*
s
,
x_lower
,
x_upper
);
g
=
-
der
(
x
);
g
=
-
der
(
x
);
...
...
dlib/optimization/optimization_line_search.h
View file @
50012d2c
...
@@ -183,6 +183,57 @@ namespace dlib
...
@@ -183,6 +183,57 @@ namespace dlib
return
put_in_range
(
0
,
1
,
alpha
);
return
put_in_range
(
0
,
1
,
alpha
);
}
}
// ----------------------------------------------------------------------------------------
inline
double
poly_min_extrap
(
double
f0
,
double
d0
,
double
x1
,
double
f_x1
,
double
x2
,
double
f_x2
)
{
DLIB_ASSERT
(
0
<
x1
&&
x1
<
x2
,
"Invalid inputs were given to this function"
);
// The contents of this function follow the equations described on page 58 of the
// book Numerical Optimization by Nocedal and Wright, second edition.
matrix
<
double
,
2
,
2
>
m
;
matrix
<
double
,
2
,
1
>
v
;
const
double
aa2
=
x2
*
x2
;
const
double
aa1
=
x1
*
x1
;
m
=
aa2
,
-
aa1
,
-
aa2
*
x2
,
aa1
*
x1
;
v
=
f_x1
-
f0
-
d0
*
x1
,
f_x2
-
f0
-
d0
*
x2
;
double
temp
=
aa2
*
aa1
*
(
x1
-
x2
);
// just take a guess if this happens
if
(
temp
==
0
)
{
return
x1
/
2.0
;
}
matrix
<
double
,
2
,
1
>
temp2
;
temp2
=
m
*
v
/
temp
;
const
double
a
=
temp2
(
0
);
const
double
b
=
temp2
(
1
);
temp
=
b
*
b
-
3
*
a
*
d0
;
if
(
temp
<
0
||
a
==
0
)
{
// This is probably a line so just pick the lowest point
if
(
f0
<
f_x2
)
return
0
;
else
return
x2
;
}
temp
=
(
-
b
+
std
::
sqrt
(
temp
))
/
(
3
*
a
);
return
put_in_range
(
0
,
x2
,
temp
);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
inline
double
lagrange_poly_min_extrap
(
inline
double
lagrange_poly_min_extrap
(
...
@@ -447,11 +498,17 @@ namespace dlib
...
@@ -447,11 +498,17 @@ namespace dlib
<<
"
\n\t
max_iter: "
<<
max_iter
<<
"
\n\t
max_iter: "
<<
max_iter
);
);
// If the gradient is telling us we need to search backwards then that is what we
// make sure alpha is going in the right direction. That is, it should be opposite
// will do.
// the direction of the gradient.
if
(
d0
>
0
&&
alpha
>
0
)
if
((
d0
>
0
&&
alpha
>
0
)
||
(
d0
<
0
&&
alpha
<
0
))
{
alpha
*=
-
1
;
alpha
*=
-
1
;
}
bool
have_prev_alpha
=
false
;
double
prev_alpha
=
0
;
double
prev_val
=
0
;
unsigned
long
iter
=
0
;
unsigned
long
iter
=
0
;
while
(
true
)
while
(
true
)
{
{
...
@@ -466,12 +523,26 @@ namespace dlib
...
@@ -466,12 +523,26 @@ namespace dlib
// Interpolate a new alpha. We also make sure the step by which we
// Interpolate a new alpha. We also make sure the step by which we
// reduce alpha is not super small.
// reduce alpha is not super small.
double
step
;
double
step
;
if
(
d0
<
0
)
if
(
!
have_prev_alpha
)
step
=
put_in_range
(
0.1
,
0.9
,
poly_min_extrap
(
f0
,
d0
,
val
));
{
if
(
d0
<
0
)
step
=
alpha
*
put_in_range
(
0.1
,
0.9
,
poly_min_extrap
(
f0
,
d0
,
val
));
else
step
=
alpha
*
put_in_range
(
0.1
,
0.9
,
poly_min_extrap
(
f0
,
-
d0
,
val
));
have_prev_alpha
=
true
;
}
else
else
step
=
put_in_range
(
0.1
,
0.9
,
poly_min_extrap
(
f0
,
-
d0
,
val
));
{
if
(
d0
<
0
)
step
=
put_in_range
(
0.1
*
alpha
,
0.9
*
alpha
,
poly_min_extrap
(
f0
,
d0
,
alpha
,
val
,
prev_alpha
,
prev_val
));
else
step
=
put_in_range
(
0.1
*
alpha
,
0.9
*
alpha
,
-
poly_min_extrap
(
f0
,
-
d0
,
-
alpha
,
val
,
-
prev_alpha
,
prev_val
));
}
prev_alpha
=
alpha
;
prev_val
=
val
;
alpha
*
=
step
;
alpha
=
step
;
}
}
}
}
}
}
...
...
dlib/optimization/optimization_line_search_abstract.h
View file @
50012d2c
...
@@ -119,6 +119,28 @@ namespace dlib
...
@@ -119,6 +119,28 @@ namespace dlib
- returns the point in the range [0,1] that minimizes the polynomial c(x)
- returns the point in the range [0,1] that minimizes the polynomial c(x)
!*/
!*/
// ----------------------------------------------------------------------------------------
inline
double
poly_min_extrap
(
double
f0
,
double
d0
,
double
x1
,
double
f_x1
,
double
x2
,
double
f_x2
)
/*!
requires
- 0 < x1 < x2
ensures
- let f(x) be a 3rd degree polynomial such that:
- f(0) == f0
- derivative of f(x) at x==0 is d0
- f(x1) == f_x1
- f(x2) == f_x2
- returns the point in the range [0,x2] that minimizes the polynomial f(x)
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
inline
double
lagrange_poly_min_extrap
(
inline
double
lagrange_poly_min_extrap
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment