kcentroid_ex.cpp 6.26 KB
Newer Older
1
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
Davis King's avatar
Davis King committed
2
3
4
5
6
7
8
9
/*
    This is an example illustrating the use of the kcentroid object 
    from the dlib C++ Library.

    The kcentroid object is an implementation of an algorithm that recursively
    computes the centroid (i.e. average) of a set of points.  The interesting
    thing about dlib::kcentroid is that it does so in a kernel induced feature
    space.  This means that you can use it as a non-linear one-class classifier.
10
11
    So you might use it to perform online novelty detection (although, it has
    other uses, see the svm_pegasos or kkmeans examples for example).  
Davis King's avatar
Davis King committed
12
13
14
15
16
17
18
19
20
    
    This example will train an instance of it on points from the sinc function.

*/

#include <iostream>
#include <vector>

#include "dlib/svm.h"
Davis King's avatar
Davis King committed
21
#include "dlib/statistics.h"
Davis King's avatar
Davis King committed
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

using namespace std;
using namespace dlib;

// Here is the sinc function we will be trying to learn with the krls
// object.
double sinc(double x)
{
    if (x == 0)
        return 1;
    return sin(x)/x;
}

int main()
{
    // Here we declare that our samples will be 2 dimensional column vectors.  
38
39
    // (Note that if you don't know the dimensionality of your vectors at compile time
    // you can change the 2 to a 0 and then set the size at runtime)
Davis King's avatar
Davis King committed
40
41
42
43
44
45
46
    typedef matrix<double,2,1> sample_type;

    // Now we are making a typedef for the kind of kernel we want to use.  I picked the
    // radial basis kernel because it only has one parameter and generally gives good
    // results without much fiddling.
    typedef radial_basis_kernel<sample_type> kernel_type;

47
48
49
50
    // Here we declare an instance of the kcentroid object.  The kcentroid has 3 parameters 
    // you need to set.  The first argument to the constructor is the kernel we wish to 
    // use.  The second is a parameter that determines the numerical accuracy with which 
    // the object will perform the centroid estimation.  Generally, smaller values 
51
    // give better results but cause the algorithm to attempt to use more dictionary vectors 
52
    // (and thus run slower and use more memory).  The third argument, however, is the 
53
    // maximum number of dictionary vectors a kcentroid is allowed to use.  So you can use
54
55
    // it to control the runtime complexity.  
    kcentroid<kernel_type> test(kernel_type(0.1),0.01, 15);
Davis King's avatar
Davis King committed
56

57

Davis King's avatar
Davis King committed
58
59
60
61
62
63
64
65
66
    // now we train our object on a few samples of the sinc function.
    sample_type m;
    for (double x = -15; x <= 8; x += 1)
    {
        m(0) = x;
        m(1) = sinc(x);
        test.train(m);
    }

Davis King's avatar
Davis King committed
67
    running_stats<double> rs;
Davis King's avatar
Davis King committed
68

Davis King's avatar
Davis King committed
69
70
71
72
    // Now lets output the distance from the centroid to some points that are from the sinc function.
    // These numbers should all be similar.  We will also calculate the statistics of these numbers
    // by accumulating them into the running_stats object called rs.  This will let us easily
    // find the mean and standard deviation of the distances for use below.
Davis King's avatar
Davis King committed
73
    cout << "Points that are on the sinc function:\n";
Davis King's avatar
Davis King committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0;   m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -4.1; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));

    cout << endl;
    // Lets output the distance from the centroid to some points that are NOT from the sinc function.
    // These numbers should all be significantly bigger than previous set of numbers.  We will also
    // use the rs.scale() function to find out how many standard deviations they are away from the 
    // mean of the test points from the sinc function.  So in this case our criterion for "significantly bigger"
    // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function.
Davis King's avatar
Davis King committed
88
    cout << "Points that are NOT on the sinc function:\n";
89
90
91
92
93
94
95
    m(0) = -1.5; m(1) = sinc(m(0))+4;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -1.5; m(1) = sinc(m(0))+3;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0;   m(1) = -sinc(m(0));    cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0.5; m(1) = -sinc(m(0));    cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -4.1; m(1) = sinc(m(0))+2;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0.5; m(1) = sinc(m(0))+1;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
Davis King's avatar
Davis King committed
96

97
    // And finally print out the mean and standard deviation of points that are actually from sinc().  
98
    cout << "\nmean: " << rs.mean() << endl;
99
    cout << "standard deviation: " << rs.stddev() << endl;
100

Davis King's avatar
Davis King committed
101
102
103
    // The output is as follows:
    /*
        Points that are on the sinc function:
104
105
106
107
108
109
110
            0.869913
            0.869913
            0.873408
            0.872807
            0.870432
            0.869913
            0.872807
Davis King's avatar
Davis King committed
111
112

        Points that are NOT on the sinc function:
113
114
115
116
117
118
119
120
121
122
            1.06366 is 119.65 standard deviations from sinc.
            1.02212 is 93.8106 standard deviations from sinc.
            0.921382 is 31.1458 standard deviations from sinc.
            0.918439 is 29.3147 standard deviations from sinc.
            0.931428 is 37.3949 standard deviations from sinc.
            0.898018 is 16.6121 standard deviations from sinc.
            0.914425 is 26.8183 standard deviations from sinc.

            mean: 0.871313
            standard deviation: 0.00160756
Davis King's avatar
Davis King committed
123
124
125
126
    */

    // So we can see that in this example the kcentroid object correctly indicates that 
    // the non-sinc points are definitely not points from the sinc function.
Davis King's avatar
Davis King committed
127
128
129
}