empirical_kernel_map_abstract.h 18.4 KB
Newer Older
1
// Copyright (C) 2009  Davis E. King (davis@dlib.net)
Davis King's avatar
Davis King committed
2
3
4
5
6
7
8
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
#ifdef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_

#include <vector>
#include "../matrix.h"
#include "kernel_abstract.h"
Davis King's avatar
Davis King committed
9
#include "function_abstract.h"
10
#include "linearly_independent_subset_finder_abstract.h"
Davis King's avatar
Davis King committed
11
12
13
14
15
#include <vector>

namespace dlib
{

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
// ----------------------------------------------------------------------------------------

    template <
        typename kernel_type, 
        typename EXP
        >
    const decision_function<kernel_type> convert_to_decision_function (
        const projection_function<kernel_type>& project_funct,
        const matrix_exp<EXP>& vect
    );
    /*!
        requires
            - is_vector(vect) == true
            - vect.size() == project_funct.out_vector_size()
            - project_funct.out_vector_size() > 0
            - project_funct.weights.nc() == project_funct.basis_vectors.size()
        ensures
            - This function interprets the given vector as a point in the kernel feature space defined 
              by the given projection function.  The return value of this function is a decision 
              function, DF, that represents the given vector in the following sense:
                - for all possible sample_type objects, S, it is the case that DF(S) == dot(project_funct(S), vect)
                  (i.e. the returned decision function computes dot products, in kernel feature space, 
                  between vect and any argument you give it.  Note also that this equality is exact, even
                  for sample_type objects not in the span of the basis_vectors.)
                - DF.kernel_function == project_funct.kernel_function
                - DF.b == 0
                - DF.basis_vectors == project_funct.basis_vectors.  
    !*/

Davis King's avatar
Davis King committed
45
46
47
48
49
50
51
52
53
// ----------------------------------------------------------------------------------------

    template <
        typename kern_type
        >
    class empirical_kernel_map
    {
        /*!
            REQUIREMENTS ON kern_type
Davis King's avatar
Davis King committed
54
                - must be a kernel function object as defined in dlib/svm/kernel_abstract.h
Davis King's avatar
Davis King committed
55
56
57

            INITIAL VALUE
                - out_vector_size() == 0
58
                - basis_size() == 0
Davis King's avatar
Davis King committed
59
60

            WHAT THIS OBJECT REPRESENTS
61
                This object represents a map from objects of sample_type (the kind of object 
Davis King's avatar
Davis King committed
62
                a kernel function operates on) to finite dimensional column vectors which 
63
64
65
                represent points in the kernel feature space defined by whatever kernel 
                is used with this object. 

Davis King's avatar
Davis King committed
66
67
68
                To use the empirical_kernel_map you supply it with a particular kernel and a set of 
                basis samples.  After that you can present it with new samples and it will project 
                them into the part of kernel feature space spanned by your basis samples.   
69
70
71
72
73
74
75
76
77
78
79
                
                This means the empirical_kernel_map is a tool you can use to very easily kernelize 
                any algorithm that operates on column vectors.  All you have to do is select a 
                set of basis samples and then use the empirical_kernel_map to project all your 
                data points into the part of kernel feature space spanned by those basis samples.
                Then just run your normal algorithm on the output vectors and it will be effectively 
                kernelized.  

                Regarding methods to select a set of basis samples, if you are working with only a 
                few thousand samples then you can just use all of them as basis samples.  
                Alternatively, the linearly_independent_subset_finder often works well for 
Davis King's avatar
Davis King committed
80
81
                selecting a basis set.  I also find that picking a random subset typically works 
                well.
82
83
84
85
86
87
88
89
90


                The empirical kernel map is something that has been around in the kernel methods
                literature for a long time but is seemingly not well known.  Anyway, one of the
                best books on the subject is the following:
                    Learning with Kernels: Support Vector Machines, Regularization, Optimization, 
                    and Beyond by Bernhard Schlkopf, Alexander J. Smola
                The authors discuss the empirical kernel map as well as many other interesting 
                topics.
Davis King's avatar
Davis King committed
91
        !*/
92

Davis King's avatar
Davis King committed
93
94
95
96
97
98
99
100
101
102
103
104
105
    public:

        typedef kern_type kernel_type;
        typedef typename kernel_type::sample_type sample_type;
        typedef typename kernel_type::scalar_type scalar_type;
        typedef typename kernel_type::mem_manager_type mem_manager_type;

        struct empirical_kernel_map_error : public error;
        /*!
            This is an exception class used to indicate a failure to create a 
            kernel map from data given by the user.
        !*/

Davis King's avatar
Davis King committed
106
107
108
109
110
111
112
        empirical_kernel_map (
        );
        /*!
            ensures
                - this object is properly initialized
        !*/

113
114
115
116
117
118
119
        void clear (
        );
        /*!
            ensures
                - this object has its initial value
        !*/

120
        template <typename T>
Davis King's avatar
Davis King committed
121
122
        void load(
            const kernel_type& kernel,
123
            const T& basis_samples
Davis King's avatar
Davis King committed
124
125
126
        );
        /*!
            requires
Davis King's avatar
Davis King committed
127
                - T must be a dlib::matrix type or something convertible to a matrix via vector_to_matrix()
128
                  (e.g. a std::vector)
129
130
131
132
                - is_vector(basis_samples) == true
                - basis_samples.size() > 0
                - kernel must be capable of operating on the elements of basis_samples.  That is,
                  expressions such as kernel(basis_samples(0), basis_samples(0)) should make sense.
Davis King's avatar
Davis King committed
133
            ensures
134
                - 0 < #out_vector_size() <= basis_samples.size()
135
                - #basis_size() == basis_samples.size()
Davis King's avatar
Davis King committed
136
                - #get_kernel() == kernel
137
138
139
140
                - This function constructs a map between normal sample_type objects and the 
                  subspace of the kernel feature space defined by the given kernel and the
                  given set of basis samples.  So after this function has been called you
                  will be able to project sample_type objects into kernel feature space
Davis King's avatar
Davis King committed
141
                  and obtain the resulting vector as a regular column matrix.
142
143
144
                - The basis samples are loaded into this object in the order in which they
                  are stored in basis_samples.  That is:
                    - for all valid i: (*this)[i] == basis_samples(i)
Davis King's avatar
Davis King committed
145
146
147
148
149
150
            throws
                - empirical_kernel_map_error
                    This exception is thrown if we are unable to create a kernel map.
                    If this happens then this object will revert back to its initial value.
        !*/

151
152
153
154
155
156
157
158
        void load(
            const linearly_independent_subset_finder<kernel_type>& lisf
        );
        /*!
            requires
                - lisf.dictionary_size() > 0
            ensures
                - #out_vector_size() == lisf.dictionary_size() 
159
                - #basis_size() == lisf.dictionary_size()
160
161
162
163
164
165
166
                - #get_kernel() == lisf.get_kernel()
                - Uses the dictionary vectors from lisf as a basis set.  Thus, this function 
                  constructs a map between normal sample_type objects and the subspace of 
                  the kernel feature space defined by the given kernel and the given set 
                  of basis samples.  So after this function has been called you will be 
                  able to project sample_type objects into kernel feature space and obtain 
                  the resulting vector as a regular column matrix.
167
168
169
                - The basis samples are loaded into this object in the order in which they
                  are stored in lisf.  That is:
                    - for all valid i: (*this)[i] == lisf[i]
170
171
172
173
174
175
            throws
                - empirical_kernel_map_error
                    This exception is thrown if we are unable to create a kernel map.
                    If this happens then this object will revert back to its initial value.
        !*/

Davis King's avatar
Davis King committed
176
177
178
179
180
181
182
183
184
185
186
187
188
        const kernel_type get_kernel (
        ) const;
        /*!
            requires
                - out_vector_size() != 0
            ensures
                - returns a copy of the kernel used by this object
        !*/

        long out_vector_size (
        ) const;
        /*!
            ensures
189
190
                - if (this object has been loaded with basis samples) then
                    - returns the dimensionality of the vectors output by the project() function.
Davis King's avatar
Davis King committed
191
192
193
194
                - else
                    - returns 0
        !*/

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
        unsigned long basis_size (
        ) const;
        /*!
            ensures
                - returns the number of basis vectors in projection_functions created
                  by this obect.  This is also equal to the number of basis vectors
                  given to the load() function.
        !*/

        const sample_type& operator[] (
            unsigned long idx
        ) const;
        /*!
            requires
                - idx < basis_size()
            ensures
                - returns a const reference to the idx'th basis vector contained inside 
                  this object.
        !*/

Davis King's avatar
Davis King committed
215
216
217
218
219
220
221
        const matrix<scalar_type,0,1,mem_manager_type>& project (
            const sample_type& sample 
        ) const;
        /*!
            requires
                - out_vector_size() != 0
            ensures
222
                - takes the given sample and projects it into the kernel feature space
Davis King's avatar
Davis King committed
223
224
                  of out_vector_size() dimensions defined by this kernel map and 
                  returns the resulting vector.
Davis King's avatar
Davis King committed
225
226
                - in more precise terms, this function returns a vector such that:
                    - The returned vector will contain out_vector_size() elements.
227
                    - for any sample_type object S, the following equality is approximately true:
Davis King's avatar
Davis King committed
228
                        - get_kernel()(sample,S) == dot(project(sample), project(S)).  
229
230
231
232
233
234
235
236
                    - The approximation error in the above equality will be zero (within rounding error)
                      if both sample_type objects involved are within the span of the set of basis 
                      samples given to the load() function.  If they are not then there will be some 
                      approximation error.  Note that all the basis samples are always within their
                      own span.  So the equality is always exact for the samples given to the load() 
                      function.
        !*/

237
        const matrix<scalar_type,0,1,mem_manager_type>& project (
238
            const sample_type& samp,
239
240
241
242
243
244
            scalar_type& projection_error
        ) const;
        /*!
            requires
                - out_vector_size() != 0
            ensures
245
                - This function returns project(samp)
246
                  (i.e. it returns the same thing as the above project() function)
247
248
249
250
                - #projection_error == the square of the distance between the point samp 
                  gets projected onto and samp's true image in kernel feature space.  
                  That is, this value is equal to: 
                    pow(convert_to_distance_function(project(samp))(samp),2)
251
252
        !*/

253
254
        template <typename EXP>
        const decision_function<kernel_type> convert_to_decision_function (
255
            const matrix_exp<EXP>& vect
256
257
258
259
260
261
262
263
264
265
266
267
        ) const;
        /*!
            requires
                - is_vector(vect) == true
                - vect.size() == out_vector_size()
                - out_vector_size() != 0
            ensures
                - This function interprets the given vector as a point in the kernel feature space defined 
                  by this empirical_kernel_map.  The return value of this function is a decision 
                  function, DF, that represents the given vector in the following sense:
                    - for all possible sample_type objects, S, it is the case that DF(S) == dot(project(S), vect)
                      (i.e. the returned decision function computes dot products, in kernel feature space, 
268
269
                      between vect and any argument you give it.  Note also that this equality is exact, even
                      for sample_type objects not in the span of the basis samples.)
Davis King's avatar
Davis King committed
270
271
272
273
                    - DF.kernel_function == get_kernel()
                    - DF.b == 0
                    - DF.basis_vectors == these will be the basis samples given to the previous call to load().  Note
                      that it is possible for there to be fewer basis_vectors than basis samples given to load().  
274
                    - DF.basis_vectors.size() == basis_size()
275
276
277
278
        !*/

        template <typename EXP>
        const distance_function<kernel_type> convert_to_distance_function (
279
            const matrix_exp<EXP>& vect
280
281
282
283
284
285
286
287
288
289
        ) const
        /*!
            requires
                - is_vector(vect) == true
                - vect.size() == out_vector_size()
                - out_vector_size() != 0
            ensures
                - This function interprets the given vector as a point in the kernel feature space defined 
                  by this empirical_kernel_map.  The return value of this function is a distance 
                  function, DF, that represents the given vector in the following sense:
290
291
292
293
294
                    - for any sample_type object S, the following equality is approximately true: 
                        - DF(S) == length(project(S) - vect)
                          (i.e. the returned distance function computes distances, in kernel feature space, 
                          between vect and any argument you give it. )
                    - The approximation error in the above equality will be zero (within rounding error)
Davis King's avatar
Davis King committed
295
296
297
                      if S is within the span of the set of basis samples given to the load() function.  
                      If it is not then there will be some approximation error.  Note that all the basis 
                      samples are always within their own span.  So the equality is always exact for the 
298
299
300
                      samples given to the load() function.  Note further that the distance computed
                      by DF(S) is always the correct distance in kernel feature space between vect and
                      the true projection of S.  That is, the above equality is approximate only because 
Davis King's avatar
Davis King committed
301
                      of potential error in the project() function, not in DF(S).
Davis King's avatar
Davis King committed
302
303
304
305
                    - DF.kernel_function == get_kernel()
                    - DF.b == dot(vect,vect) 
                    - DF.basis_vectors == these will be the basis samples given to the previous call to load().  Note
                      that it is possible for there to be fewer basis_vectors than basis samples given to load().  
306
                    - DF.basis_vectors.size() == basis_size()
Davis King's avatar
Davis King committed
307
308
        !*/

309
310
311
312
313
314
315
316
317
        const projection_function<kernel_type> get_projection_function (
        ) const;
        /*!
            requires
                - out_vector_size() != 0
            ensures
                - returns a projection_function, PF, that computes the same projection as project().
                  That is, calling PF() on any sample will produce the same output vector as calling
                  this->project() on that sample.
318
                - PF.basis_vectors.size() == basis_size()
319
320
        !*/

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
        const matrix<scalar_type,0,0,mem_manager_type> get_transformation_to (
            const empirical_kernel_map& target
        ) const;
        /*!
            requires
                - get_kernel() == target.get_kernel()
                - out_vector_size() != 0
                - target.out_vector_size() != 0
            ensures
                - A point in the kernel feature space defined by the kernel get_kernel() typically
                  has different representations with respect to different empirical_kernel_maps.
                  This function lets you obtain a transformation matrix that will allow you
                  to map between these different representations. That is, this function returns 
                  a matrix M with the following properties:    
                    - M maps vectors represented according to *this into the representation used by target. 
                    - M.nr() == target.out_vector_size()
                    - M.nc() == this->out_vector_size()
                    - Let V be a vector of this->out_vector_size() length.  Then define two distance_functions
                      DF1 = this->convert_to_distance_function(V)
                      DF2 = target.convert_to_distance_function(M*V)

                      Then DF1(DF2) == 0 // i.e. the distance between these two points should be 0

                      That is, DF1 and DF2 both represent the same point in kernel feature space.  Note
                      that the above equality is only approximate.  If the vector V represents a point in
                      kernel space that isn't in the span of the basis samples used by target then the 
                      equality is approximate.  However, if it is in their span then the equality will
                      be exact.  For example, if target's basis samples are a superset of the basis samples
                      used by *this then the equality will always be exact (within rounding error).
        !*/

Davis King's avatar
Davis King committed
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
        void swap (
            empirical_kernel_map& item
        );
        /*!
            ensures
                - swaps the state of *this and item
        !*/

    };

// ----------------------------------------------------------------------------------------

    template <
        typename kernel_type
        >
    void swap (
        empirical_kernel_map<kernel_type>& a,
        empirical_kernel_map<kernel_type>& b
    ) { a.swap(b); }
    /*!
        provides a global swap function
    !*/

    template <
        typename kernel_type
        >
    void serialize (
        const empirical_kernel_map<kernel_type>& item,
        std::ostream& out
    );
    /*!
        provides serialization support for empirical_kernel_map objects
    !*/

    template <
        typename kernel_type
        >
    void deserialize (
        empirical_kernel_map<kernel_type>& item,
        std::istream& in 
    );
    /*!
        provides serialization support for empirical_kernel_map objects
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_