// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt /* This is an example illustrating the use of the feature ranking tools from the dlib C++ Library. This example creates a simple set of data and then shows you how to use feature ranking to find a good set of features (where "good" means the feature set will probably work well with a classification algorithm). The data used in this example will be 4 dimensional data and will come from a distribution where points with a distance less than 10 from the origin are labeled +1 and all other points are labeled as -1. Note that this data is conceptually 2 dimensional but we will add two extra features for the purpose of showing what feature ranking does. */ #include #include "dlib/svm.h" #include "dlib/rand.h" #include using namespace std; using namespace dlib; int main() { // This first typedef declares a matrix with 4 rows and 1 column. It will be the // object that contains each of our 4 dimensional samples. typedef matrix sample_type; // Now lets make some vector objects that can hold our samples std::vector samples; std::vector labels; dlib::rand::float_1a rnd; for (int x = -30; x <= 30; ++x) { for (int y = -30; y <= 30; ++y) { sample_type samp; // the first two features are just the (x,y) position of our points and so // we expect them to be good features since our two classes here are points // close to the origin and points far away from the origin. samp(0) = x; samp(1) = y; // This is a worthless feature since it is just random noise. It should // be indicated as worthless by the feature ranking below. samp(2) = rnd.get_random_double(); // This is a version of the y feature that is corrupted by random noise. It // should be ranked as less useful than features 0, and 1, but more useful // than the above feature. samp(3) = y*0.2 + (rnd.get_random_double()-0.5)*10; // add this sample into our vector of samples. samples.push_back(samp); // if this point is less than 15 from the origin then label it as a +1 class point. // otherwise it is a -1 class point if (sqrt((double)x*x + y*y) <= 15) labels.push_back(+1); else labels.push_back(-1); } } // Here we normalize all the samples by subtracting their mean and dividing by their standard deviation. // This is generally a good idea since it often heads off numerical stability problems and also // prevents one large feature from smothering others. const sample_type m(mean(vector_to_matrix(samples))); // compute a mean vector const sample_type sd(reciprocal(sqrt(variance(vector_to_matrix(samples))))); // compute a standard deviation vector // now normalize each sample for (unsigned long i = 0; i < samples.size(); ++i) samples[i] = pointwise_multiply(samples[i] - m, sd); // This is another thing that is often good to do from a numerical stability point of view. // However, in our case it doesn't matter. It's just here to show you how to do it. randomize_samples(samples,labels); // Finally we get to the feature ranking. Here we call verbose_rank_features_rbf() with // the samples and labels we made above. The 20 is a measure of how much memory and CPU // resources the algorithm should use. Generally bigger values give better results but // take longer to run. cout << verbose_rank_features_rbf(samples, labels, 20) << endl; // The output is: /* 0 0.810087 1 1 3 0.873991 2 0.668913 */ // The first column is a list of the features in order of decreasing goodness. So the feature ranking function // is telling us that the samples[i](0) and samples[i](1) (i.e. the x and y) features are the best two. Then // after that the next best feature is the samples[i](3) (i.e. the y corrupted by noise) and finally the worst // feature is the one that is just random noise. So in this case the feature ranking did exactly what we would // intuitively expect. // The second column of the matrix is a number that indicates how much the features up to that point // contribute to the separation of the two classes. So bigger numbers are better since they // indicate a larger separation. // So to break it down a little more. // 1 0.810087 <-- class separation of feature 1 all by itself // 0 1 <-- class separation of feature 1 and 0 // 3 0.873991 <-- class separation of feature 1, 0, and 3 // 2 0.668913 <-- class separation of feature 1, 0, 3, and 2 }