rank_features_ex.cpp 4.91 KB
Newer Older
1
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
Davis King's avatar
Davis King committed
2
3
/*

4
5
    This is an example illustrating the use of the feature ranking 
    tools from the dlib C++ Library.  
Davis King's avatar
Davis King committed
6

Davis King's avatar
Davis King committed
7
8
9
10
    This example creates a simple set of data and then shows you how 
    to use feature ranking to find a good set of features (where 
    "good" means the feature set will probably work well with a 
    classification algorithm).
Davis King's avatar
Davis King committed
11
12
13
14
15
16

    The data used in this example will be 4 dimensional data and will
    come from a distribution where points with a distance less than 10
    from the origin are labeled +1 and all other points are labeled
    as -1.  Note that this data is conceptually 2 dimensional but we
    will add two extra features for the purpose of showing what
Davis King's avatar
Davis King committed
17
    feature ranking does.
Davis King's avatar
Davis King committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
*/


#include <iostream>
#include "dlib/svm.h"
#include "dlib/rand.h"
#include <vector>

using namespace std;
using namespace dlib;


int main()
{

    // This first typedef declares a matrix with 4 rows and 1 column.  It will be the
    // object that contains each of our 4 dimensional samples.  
    typedef matrix<double, 4, 1> sample_type;



    // Now lets make some vector objects that can hold our samples 
    std::vector<sample_type> samples;
    std::vector<double> labels;

    dlib::rand::float_1a rnd;

Davis King's avatar
Davis King committed
45
    for (int x = -30; x <= 30; ++x)
Davis King's avatar
Davis King committed
46
    {
Davis King's avatar
Davis King committed
47
        for (int y = -30; y <= 30; ++y)
Davis King's avatar
Davis King committed
48
49
50
51
52
53
54
55
56
57
        {
            sample_type samp;

            // the first two features are just the (x,y) position of our points and so
            // we expect them to be good features since our two classes here are points
            // close to the origin and points far away from the origin.
            samp(0) = x;
            samp(1) = y;

            // This is a worthless feature since it is just random noise.  It should
58
            // be indicated as worthless by the feature ranking below.
Davis King's avatar
Davis King committed
59
60
61
62
63
            samp(2) = rnd.get_random_double();

            // This is a version of the y feature that is corrupted by random noise.  It
            // should be ranked as less useful than features 0, and 1, but more useful
            // than the above feature.
Davis King's avatar
Davis King committed
64
            samp(3) = y*0.2 + (rnd.get_random_double()-0.5)*10;
Davis King's avatar
Davis King committed
65
66
67
68

            // add this sample into our vector of samples.
            samples.push_back(samp);

Davis King's avatar
Davis King committed
69
            // if this point is less than 15 from the origin then label it as a +1 class point.  
Davis King's avatar
Davis King committed
70
            // otherwise it is a -1 class point
Davis King's avatar
Davis King committed
71
            if (sqrt((double)x*x + y*y) <= 15)
Davis King's avatar
Davis King committed
72
73
74
75
76
77
78
                labels.push_back(+1);
            else
                labels.push_back(-1);
        }
    }


Davis King's avatar
Davis King committed
79
    // Here we normalize all the samples by subtracting their mean and dividing by their standard deviation.
Davis King's avatar
Davis King committed
80
81
82
83
84
85
86
87
    // This is generally a good idea since it often heads off numerical stability problems and also 
    // prevents one large feature from smothering others.
    const sample_type m(mean(vector_to_matrix(samples)));  // compute a mean vector
    const sample_type sd(reciprocal(sqrt(variance(vector_to_matrix(samples))))); // compute a standard deviation vector
    // now normalize each sample
    for (unsigned long i = 0; i < samples.size(); ++i)
        samples[i] = pointwise_multiply(samples[i] - m, sd); 

88

Davis King's avatar
Davis King committed
89
    // This is another thing that is often good to do from a numerical stability point of view.  
Davis King's avatar
Davis King committed
90
    // However, in our case it doesn't matter.   It's just here to show you how to do it.
Davis King's avatar
Davis King committed
91
92
93
94
    randomize_samples(samples,labels);



95
96
97
98
99
    // Finally we get to the feature ranking. Here we call verbose_rank_features_rbf() with
    // the samples and labels we made above.  The 20 is a measure of how much memory and CPU
    // resources the algorithm should use.  Generally bigger values give better results but 
    // take longer to run.
    cout << verbose_rank_features_rbf(samples, labels, 20) << endl;
Davis King's avatar
Davis King committed
100
101
102

    // The output is:
    /*
103
104
105
106
        0 0.810087 
        1        1 
        3 0.873991 
        2 0.668913 
Davis King's avatar
Davis King committed
107
108
    */

109
    // The first column is a list of the features in order of decreasing goodness.  So the feature ranking function
Davis King's avatar
Davis King committed
110
111
    // is telling us that the samples[i](0) and samples[i](1) (i.e. the x and y) features are the best two.  Then
    // after that the next best feature is the samples[i](3) (i.e. the y corrupted by noise) and finally the worst
112
    // feature is the one that is just random noise.  So in this case the feature ranking did exactly what we would
Davis King's avatar
Davis King committed
113
114
115
    // intuitively expect.


Davis King's avatar
Davis King committed
116
117
118
    // The second column of the matrix is a number that indicates how much the features up to that point
    // contribute to the separation of the two classes.  So bigger numbers are better since they
    // indicate a larger separation.
Davis King's avatar
Davis King committed
119
120

    // So to break it down a little more.
121
122
123
124
    //    1 0.810087   <-- class separation of feature 1 all by itself
    //    0        1   <-- class separation of feature 1 and 0
    //    3 0.873991   <-- class separation of feature 1, 0, and 3
    //    2 0.668913   <-- class separation of feature 1, 0, 3, and 2
Davis King's avatar
Davis King committed
125
        
Davis King's avatar
Davis King committed
126
127
128

}