optimization_oca.h 9.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// Copyright (C) 2010  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_OPTIMIZATIoN_OCA_H__
#define DLIB_OPTIMIZATIoN_OCA_H__

#include "optimization_oca_abstract.h"

#include "../matrix.h"
#include "optimization_solve_qp_using_smo.h"
#include <list>

// ----------------------------------------------------------------------------------------

namespace dlib
{
    template <typename matrix_type>
    class oca_problem
    {
    public:
        typedef typename matrix_type::type scalar_type;

        virtual ~oca_problem() {}

        virtual void optimization_status (
            scalar_type ,
            scalar_type ,
            unsigned long 
        ) const {}

Davis King's avatar
Davis King committed
30
        virtual bool r_has_lower_bound (
31
32
33
            scalar_type& 
        ) const { return false; }

Davis King's avatar
Davis King committed
34
        virtual scalar_type get_c (
35
36
        ) const = 0;

Davis King's avatar
Davis King committed
37
        virtual long get_num_dimensions (
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
        ) const = 0;

        virtual void get_risk (
            matrix_type& current_solution,
            scalar_type& risk_value,
            matrix_type& risk_subgradient
        ) const = 0;

    };

// ----------------------------------------------------------------------------------------

    class oca
    {
    public:

        oca () 
        {
            eps = 0.001;
            max_iter = 1000000;

            sub_eps = 1e-5;
            sub_max_iter = 20000;

            inactive_thresh = 15;
        }

        void set_epsilon (
            double eps_
        ) 
        { 
            // make sure requires clause is not broken
            DLIB_ASSERT(eps_ > 0,
                "\t void oca::set_epsilon"
                << "\n\t epsilon must be greater than 0"
                << "\n\t eps_: " << eps_ 
                << "\n\t this: " << this
                );

            eps = eps_; 
        }

        double get_epsilon (
        ) const { return eps; }

        void set_max_iterations (
            unsigned long max_iter_
        ) 
        { 
            // make sure requires clause is not broken
            DLIB_ASSERT(max_iter_ > 0,
                "\t void oca::set_max_iterations"
                << "\n\t max iterations must be greater than 0"
                << "\n\t max_iter_: " << max_iter_
                << "\n\t this: " << this
                );

            max_iter = max_iter_; 
        }

        unsigned long get_max_iterations (
        ) const { return max_iter; }

        void set_subproblem_epsilon (
            double eps_
        ) { sub_eps = eps_; }

        double get_subproblem_epsilon (
        ) const { return sub_eps; }

        void set_subproblem_max_iterations (
            unsigned long sub_max_iter_
        ) 
        { 
            // make sure requires clause is not broken
            DLIB_ASSERT(sub_max_iter_ > 0,
                "\t void oca::set_subproblem_max_iterations"
                << "\n\t max iterations must be greater than 0"
                << "\n\t sub_max_iter_: " << sub_max_iter_
                << "\n\t this: " << this
                );

            sub_max_iter = sub_max_iter_; 
        }

        unsigned long get_subproblem_max_iterations (
        ) const { return sub_max_iter; }

        void set_inactive_plane_threshold (
            unsigned long inactive_thresh_
        ) 
        { 
            // make sure requires clause is not broken
            DLIB_ASSERT(inactive_thresh_ > 0,
                "\t void oca::set_inactive_plane_threshold"
                << "\n\t inactive threshold must be greater than 0"
                << "\n\t inactive_thresh_: " << inactive_thresh_
                << "\n\t this: " << this
                );

            inactive_thresh = inactive_thresh_; 
        }

        unsigned long get_inactive_plane_threshold (
        ) const { return inactive_thresh; }

        template <
            typename matrix_type
            >
Davis King's avatar
Davis King committed
147
        typename matrix_type::type operator() (
148
149
150
151
            const oca_problem<matrix_type>& problem,
            matrix_type& w
        ) const
        {
Davis King's avatar
Davis King committed
152
            // make sure requires clause is not broken
Davis King's avatar
Davis King committed
153
            DLIB_ASSERT(problem.get_c() > 0 &&
Davis King's avatar
Davis King committed
154
155
156
                        problem.get_num_dimensions() > 0,
                "\t void oca::operator()"
                << "\n\t The oca_problem is invalid"
Davis King's avatar
Davis King committed
157
                << "\n\t problem.get_c():              " << problem.get_c() 
Davis King's avatar
Davis King committed
158
159
160
161
                << "\n\t problem.get_num_dimensions(): " << problem.get_num_dimensions() 
                << "\n\t this: " << this
                );

162
163
164
            typedef typename matrix_type::type scalar_type;
            typedef typename matrix_type::layout_type layout_type;
            typedef typename matrix_type::mem_manager_type mem_manager_type;
Davis King's avatar
Davis King committed
165
            typedef matrix_type vect_type;
166

Davis King's avatar
Davis King committed
167
            const scalar_type C = problem.get_c();
168
169
170
171
172
173

            std::list<vect_type> planes;
            std::vector<scalar_type> bs, miss_count;

            vect_type temp, alpha, w_cur;

Davis King's avatar
Davis King committed
174
            w.set_size(problem.get_num_dimensions(), 1);
175
176
177
178
179
180
181
182
183
184
185
186
            w = 0;
            w_cur = w;

            // The best objective value seen so far.   Note also
            // that w always contains the best solution seen so far.
            scalar_type best_obj = std::numeric_limits<scalar_type>::max();

            // This will hold the cutting plane objective value.  This value is
            // a lower bound on the true optimal objective value.
            scalar_type cp_obj = 0;

            scalar_type R_lower_bound;
Davis King's avatar
Davis King committed
187
            if (problem.r_has_lower_bound(R_lower_bound))
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
            {
                // The flat lower bounding plane is always good to have if we know
                // what it is.
                bs.push_back(R_lower_bound);
                planes.push_back(zeros_matrix<scalar_type>(w.size(),1));
                miss_count.push_back(0);
            }

            matrix<scalar_type,0,0,mem_manager_type, layout_type> K;

            for (unsigned long iter = 0; iter < max_iter; ++iter)
            {

                // add the next cutting plane
                scalar_type cur_risk;
                planes.resize(planes.size()+1);
                problem.get_risk(w_cur, cur_risk, planes.back());
                bs.push_back(cur_risk - dot(w_cur,planes.back()));
                miss_count.push_back(0);

                // Check the objective value at w_cur and see if it is better than
                // the best seen so far.
                const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk;
                if (cur_obj < best_obj)
                {
                    best_obj = cur_obj;
                    w = w_cur;
                }

                // check stopping condition and stop if we can
                if (best_obj - cp_obj <= eps)
                    break;


                // compute kernel matrix for all the planes
                K.set_size(planes.size(), planes.size());
                long rr = 0;
                for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
                {
                    long cc = rr;
                    for (typename std::list<vect_type>::iterator c = r; c != planes.end(); ++c)
                    {
                        K(rr,cc) = dot(*r, *c);
                        K(cc,rr) = K(rr,cc);
                        ++cc;
                    }
                    ++rr;
                }

                alpha = uniform_matrix<scalar_type>(planes.size(),1, C/planes.size());

                // solve the cutting plane subproblem for the next w_cur
                solve_qp_using_smo(K, vector_to_matrix(bs), alpha, static_cast<scalar_type>(sub_eps), sub_max_iter); 

                // construct the w_cur that minimized the subproblem.
                w_cur = 0;
                rr = 0;
                for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
                {
                    if (alpha(rr) != 0)
                    {
                        w_cur -= alpha(rr)*(*i);
                        miss_count[rr] = 0;
                    }
                    else
                    {
                        miss_count[rr] += 1;
                    }
                    ++rr;
                }

                // Compute the lower bound on the true objective given to us by the cutting 
                // plane subproblem.
                cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs);

                // check stopping condition and stop if we can
                if (best_obj - cp_obj <= eps)
                    break;

                // report current status
                problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());

                // If it has been a while since a cutting plane was an active constraint then
                // we should throw it away.
                while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
                {
                    long idx = index_of_max(vector_to_matrix(miss_count));
                    typename std::list<vect_type>::iterator i0 = planes.begin();
                    advance(i0, idx);
                    planes.erase(i0);
                    bs.erase(bs.begin()+idx);
                    miss_count.erase(miss_count.begin()+idx);
                }

            }

            // report current status
            problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());

Davis King's avatar
Davis King committed
287
            return best_obj;
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
        }

    private:

        double eps;
        double sub_eps;

        unsigned long max_iter;
        unsigned long sub_max_iter;

        unsigned long inactive_thresh;
    };
}

// ----------------------------------------------------------------------------------------

#endif // DLIB_OPTIMIZATIoN_OCA_H__