svm_ex.cpp 10.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*

    This is an example illustrating the use of the support vector machine
    utilities from the dlib C++ Library.  

    This example creates a simple set of data to train on and then shows
    you how to use the cross validation and svm training functions
    to find a good decision function that can classify examples in our
    data set.


    The data used in this example will be 2 dimensional data and will
    come from a distribution where points with a distance less than 10
    from the origin are labeled +1 and all other points are labeled
    as -1.
        
*/


#include <iostream>
#include "dlib/svm.h"

using namespace std;
using namespace dlib;


int main()
{
Davis King's avatar
Davis King committed
29
30
    // The svm functions use column vectors to contain a lot of the data on which they they 
    // operate. So the first thing we do here is declare a convenient typedef.  
31

Davis King's avatar
Davis King committed
32
    // This typedef declares a matrix with 2 rows and 1 column.  It will be the
33
    // object that contains each of our 2 dimensional samples.   (Note that if you wanted 
Davis King's avatar
Davis King committed
34
35
36
    // more than 2 features in this vector you can simply change the 2 to something else.
    // Or if you don't know how many features you want until runtime then you can put a 0
    // here and use the matrix.set_size() member function)
37
38
39
40
41
42
43
44
    typedef matrix<double, 2, 1> sample_type;

    // This is a typedef for the type of kernel we are going to use in this example.
    // In this case I have selected the radial basis kernel that can operate on our
    // 2D sample_type objects
    typedef radial_basis_kernel<sample_type> kernel_type;


Davis King's avatar
Davis King committed
45
46
47
    // Now we make objects to contain our samples and their respective labels.
    std::vector<sample_type> samples;
    std::vector<double> labels;
48
49

    // Now lets put some data into our samples and labels objects.  We do this
Davis King's avatar
Davis King committed
50
    // by looping over a bunch of points and labeling them according to their
51
52
53
54
55
    // distance from the origin.
    for (int r = -20; r <= 20; ++r)
    {
        for (int c = -20; c <= 20; ++c)
        {
Davis King's avatar
Davis King committed
56
57
58
59
            sample_type samp;
            samp(0) = r;
            samp(1) = c;
            samples.push_back(samp);
60
61
62

            // if this point is less than 10 from the origin
            if (sqrt((double)r*r + c*c) <= 10)
Davis King's avatar
Davis King committed
63
                labels.push_back(+1);
64
            else
Davis King's avatar
Davis King committed
65
                labels.push_back(-1);
66
67
68
69
70

        }
    }


Davis King's avatar
Davis King committed
71
72
73
74
75
76
77
78
79
80
81
82
83
    // Here we normalize all the samples by subtracting their mean and dividing by their standard deviation.
    // This is generally a good idea since it often heads off numerical stability problems and also 
    // prevents one large feature from smothering others.  Doing this doesn't matter much in this example
    // so I'm just doing this here so you can see an easy way to accomplish this with 
    // the library.  
    const sample_type m(mean(vector_to_matrix(samples)));  // compute a mean vector
    const sample_type sd(reciprocal(sqrt(variance(vector_to_matrix(samples))))); // compute a standard deviation vector
    // now normalize each sample
    for (unsigned long i = 0; i < samples.size(); ++i)
        samples[i] = pointwise_multiply(samples[i] - m, sd); 



84
85
86
    // Now that we have some data we want to train on it.  However, there are two parameters to the 
    // training.  These are the nu and gamma parameters.  Our choice for these parameters will 
    // influence how good the resulting decision function is.  To test how good a particular choice 
Davis King's avatar
Davis King committed
87
    // of these parameters are we can use the cross_validate_trainer() function to perform n-fold cross
88
89
90
91
92
93
94
95
96
97
98
99
    // validation on our training data.  However, there is a problem with the way we have sampled 
    // our distribution above.  The problem is that there is a definite ordering to the samples.  
    // That is, the first half of the samples look like they are from a different distribution 
    // than the second half do.  This would screw up the cross validation process but we can 
    // fix it by randomizing the order of the samples with the following function call.
    randomize_samples(samples, labels);


    // The nu parameter has a maximum value that is dependent on the ratio of the +1 to -1 
    // labels in the training data.  This function finds that value.
    const double max_nu = maximum_nu(labels);

Davis King's avatar
Davis King committed
100
101
102
    // here we make an instance of the svm_nu_trainer object that uses our kernel type.
    svm_nu_trainer<kernel_type> trainer;

103
104
105
106
107
108
109
110
111
    // Now we loop over some different nu and gamma values to see how good they are.  Note
    // that this is just a simple brute force way to try out a few possible parameter 
    // choices.  You may want to investigate more sophisticated strategies for determining 
    // good parameter choices.
    cout << "doing cross validation" << endl;
    for (double gamma = 0.00001; gamma <= 1; gamma += 0.1)
    {
        for (double nu = 0.00001; nu < max_nu; nu += 0.1)
        {
Davis King's avatar
Davis King committed
112
113
114
115
            // tell the trainer the parameters we want to use
            trainer.set_kernel(kernel_type(gamma));
            trainer.set_nu(nu);

116
117
            cout << "gamma: " << gamma << "    nu: " << nu;
            // Print out the cross validation accuracy for 3-fold cross validation using the current gamma and nu.  
Davis King's avatar
Davis King committed
118
            // cross_validate_trainer() returns a row vector.  The first element of the vector is the fraction
119
120
            // of +1 training examples correctly classified and the second number is the fraction of -1 training 
            // examples correctly classified.
Davis King's avatar
Davis King committed
121
            cout << "     cross validation accuracy: " << cross_validate_trainer(trainer, samples, labels, 3);
122
123
124
125
126
127
128
129
130
131
        }
    }


    // From looking at the output of the above loop it turns out that a good value for 
    // nu and gamma for this problem is 0.1 for both.  So that is what we will use.

    // Now we train on the full set of data and obtain the resulting decision function.  We use the
    // value of 0.1 for nu and gamma.  The decision function will return values >= 0 for samples it predicts
    // are in the +1 class and numbers < 0 for samples it predicts to be in the -1 class.
Davis King's avatar
Davis King committed
132
133
134
    trainer.set_kernel(kernel_type(0.1));
    trainer.set_nu(0.1);
    decision_function<kernel_type> learned_decision_function = trainer.train(samples, labels);
135
136

    // print out the number of support vectors in the resulting decision function
Davis King's avatar
Davis King committed
137
138
    cout << "\nnumber of support vectors in our learned_decision_function is " 
         << learned_decision_function.support_vectors.nr() << endl;
139
140
141
142
143
144

    // now lets try this decision_function on some samples we haven't seen before 
    sample_type sample;

    sample(0) = 3.123;
    sample(1) = 2;
Davis King's avatar
Davis King committed
145
146
147
    // don't forget that we have to normalize each new sample the same way we did for the training samples.
    sample = pointwise_multiply(sample-m, sd);

148
149
150
151
    cout << "This sample should be >= 0 and it is classified as a " << learned_decision_function(sample) << endl;

    sample(0) = 3.123;
    sample(1) = 9.3545;
Davis King's avatar
Davis King committed
152
    sample = pointwise_multiply(sample-m, sd);
153
154
155
156
    cout << "This sample should be >= 0 and it is classified as a " << learned_decision_function(sample) << endl;

    sample(0) = 13.123;
    sample(1) = 9.3545;
Davis King's avatar
Davis King committed
157
    sample = pointwise_multiply(sample-m, sd);
158
159
160
161
    cout << "This sample should be < 0 and it is classified as a " << learned_decision_function(sample) << endl;

    sample(0) = 13.123;
    sample(1) = 0;
Davis King's avatar
Davis King committed
162
    sample = pointwise_multiply(sample-m, sd);
163
164
165
    cout << "This sample should be < 0 and it is classified as a " << learned_decision_function(sample) << endl;


Davis King's avatar
Davis King committed
166
167
168
169
170
    // We can also train a decision function that reports a well conditioned probability 
    // instead of just a number > 0 for the +1 class and < 0 for the -1 class.  An example 
    // of doing that follows:
    probabilistic_decision_function<kernel_type> learned_probabilistic_decision_function;  
    learned_probabilistic_decision_function = train_probabilistic_decision_function(trainer, samples, labels, 3);
171
    // Now we have a function that returns the probability that a given sample is of the +1 class.  
172

Davis King's avatar
Davis King committed
173
174
    // print out the number of support vectors in the resulting decision function.  
    // (it should be the same as in the one above)
175
176
177
178
179
    cout << "\nnumber of support vectors in our learned_probabilistic_decision_function is " 
         << learned_probabilistic_decision_function.decision_funct.support_vectors.nr() << endl;

    sample(0) = 3.123;
    sample(1) = 2;
Davis King's avatar
Davis King committed
180
181
182
    sample = pointwise_multiply(sample-m, sd);
    cout << "This +1 example should have high probability.  It's probability is: " 
         << learned_probabilistic_decision_function(sample) << endl;
183
184
185

    sample(0) = 3.123;
    sample(1) = 9.3545;
Davis King's avatar
Davis King committed
186
187
188
    sample = pointwise_multiply(sample-m, sd);
    cout << "This +1 example should have high probability.  It's probability is: " 
         << learned_probabilistic_decision_function(sample) << endl;
189
190
191

    sample(0) = 13.123;
    sample(1) = 9.3545;
Davis King's avatar
Davis King committed
192
193
194
    sample = pointwise_multiply(sample-m, sd);
    cout << "This -1 example should have low probability.  It's probability is: " 
         << learned_probabilistic_decision_function(sample) << endl;
195
196
197

    sample(0) = 13.123;
    sample(1) = 0;
Davis King's avatar
Davis King committed
198
199
200
    sample = pointwise_multiply(sample-m, sd);
    cout << "This -1 example should have low probability.  It's probability is: " 
         << learned_probabilistic_decision_function(sample) << endl;
201
202


203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232


    // Lastly, note that the decision functions we trained above involved well over 100 
    // support vectors.  Support vector machines in general tend to find decision functions
    // that involve a lot of support vectors.  This is significant because the more 
    // support vectors in a decision function, the longer it takes to classify new examples.
    // So dlib provides the ability to find an approximation to the normal output of a
    // support vector machine using fewer support vectors.  

    // Here we determine the cross validation accuracy when we approximate the output
    // using only 10 support vectors.  To do this we use the reduced2() function.  It
    // takes a trainer object and the number of support vectors to use and returns 
    // a new trainer object that applies the necessary post processing during the creation
    // of decision function objects.
    cout << "\ncross validation accuracy with only 10 support vectors: " 
         << cross_validate_trainer(reduced2(trainer,10), samples, labels, 3);

    // Lets print out the original cross validation score too for comparison.
    cout << "cross validation accuracy with all the original support vectors: " 
         << cross_validate_trainer(trainer, samples, labels, 3);

    // When you run this program you should see that, for this problem, you can reduce 
    // the number of support vectors down to 10 without hurting the cross validation
    // accuracy. 


    // To get the reduced decision function out we would just do this:
    learned_decision_function = reduced2(trainer,10).train(samples, labels);
    // And similarly for the probabilistic_decision_function: 
    learned_probabilistic_decision_function = train_probabilistic_decision_function(reduced2(trainer,10), samples, labels, 3);
233
234
}