Commit c290381b authored by Davis King's avatar Davis King
Browse files

Made the solver use an SMO type iteration in the beginning before

switching to projected gradient steps.
parent 1a494a91
...@@ -118,6 +118,7 @@ namespace dlib ...@@ -118,6 +118,7 @@ namespace dlib
for (unsigned long c = 0; c < horizon; ++c) for (unsigned long c = 0; c < horizon; ++c)
{ {
lambda += trace(trans(B)*temp*B); lambda += trace(trans(B)*temp*B);
Q_diag[horizon-c-1] = diag(trans(B)*temp*B);
temp = trans(A)*temp*A + diagm(Q); temp = trans(A)*temp*A + diagm(Q);
} }
...@@ -264,9 +265,6 @@ namespace dlib ...@@ -264,9 +265,6 @@ namespace dlib
for (unsigned long i = 0; i < horizon; ++i) for (unsigned long i = 0; i < horizon; ++i)
MM[i] = trans(B)*M[i]; MM[i] = trans(B)*M[i];
for (unsigned long i = 0; i < horizon; ++i)
v[i] = controls[i];
unsigned long iter = 0; unsigned long iter = 0;
...@@ -289,6 +287,8 @@ namespace dlib ...@@ -289,6 +287,8 @@ namespace dlib
// Check the stopping condition, which is the magnitude of the largest element // Check the stopping condition, which is the magnitude of the largest element
// of the gradient. // of the gradient.
double max_df = 0; double max_df = 0;
unsigned long max_t = 0;
long max_v = 0;
for (unsigned long i = 0; i < horizon; ++i) for (unsigned long i = 0; i < horizon; ++i)
{ {
for (long j = 0; j < controls[i].size(); ++j) for (long j = 0; j < controls[i].size(); ++j)
...@@ -298,7 +298,12 @@ namespace dlib ...@@ -298,7 +298,12 @@ namespace dlib
if (!((controls[i](j) <= lower(j) && df[i](j) > 0) || if (!((controls[i](j) <= lower(j) && df[i](j) > 0) ||
(controls[i](j) >= upper(j) && df[i](j) < 0))) (controls[i](j) >= upper(j) && df[i](j) < 0)))
{ {
max_df = std::max(max_df, std::abs(df[i](j))); if (std::abs(df[i](j)) > max_df)
{
max_df = std::abs(df[i](j));
max_t = i;
max_v = j;
}
} }
} }
} }
...@@ -306,7 +311,31 @@ namespace dlib ...@@ -306,7 +311,31 @@ namespace dlib
break; break;
// take a step based on the gradient
// We will start out by doing a little bit of coordinate descent because it
// allows us to optimize individual variables exactly. Since we are warm
// starting each iteration with a really good solution this helps speed
// things up a lot.
const unsigned long smo_iters = 50;
if (iter < smo_iters)
{
if (Q_diag[max_t](max_v) == 0) continue;
// Take the optimal step but just for one variable.
controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v);
controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v));
// If this is the last SMO iteration then don't forget to initialize v
// for the gradient steps.
if (iter+1 == smo_iters)
{
for (unsigned long i = 0; i < horizon; ++i)
v[i] = controls[i];
}
}
else
{
// Take a projected gradient step.
for (unsigned long i = 0; i < horizon; ++i) for (unsigned long i = 0; i < horizon; ++i)
{ {
v_old[i] = v[i]; v_old[i] = v[i];
...@@ -315,6 +344,7 @@ namespace dlib ...@@ -315,6 +344,7 @@ namespace dlib
} }
} }
} }
}
unsigned long max_iterations; unsigned long max_iterations;
double eps; double eps;
...@@ -329,6 +359,7 @@ namespace dlib ...@@ -329,6 +359,7 @@ namespace dlib
matrix<double,S,1> target[horizon]; matrix<double,S,1> target[horizon];
double lambda; // abound on the largest eigenvalue of the hessian matrix. double lambda; // abound on the largest eigenvalue of the hessian matrix.
matrix<double,I,1> Q_diag[horizon];
matrix<double,I,1> controls[horizon]; matrix<double,I,1> controls[horizon];
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment