"torchvision/vscode:/vscode.git/clone" did not exist on "cb4413a3be28fc07ca749f2cef30aaff1439e582"
kcentroid_ex.cpp 6.51 KB
Newer Older
Davis King's avatar
Davis King committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
    This is an example illustrating the use of the kcentroid object 
    from the dlib C++ Library.

    The kcentroid object is an implementation of an algorithm that recursively
    computes the centroid (i.e. average) of a set of points.  The interesting
    thing about dlib::kcentroid is that it does so in a kernel induced feature
    space.  This means that you can use it as a non-linear one-class classifier.
    So you might use it to perform online novelty detection.  
    
    This example will train an instance of it on points from the sinc function.

*/

#include <iostream>
#include <vector>

#include "dlib/svm.h"
Davis King's avatar
Davis King committed
19
#include "dlib/statistics.h"
Davis King's avatar
Davis King committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

using namespace std;
using namespace dlib;

// Here is the sinc function we will be trying to learn with the krls
// object.
double sinc(double x)
{
    if (x == 0)
        return 1;
    return sin(x)/x;
}

int main()
{
    // Here we declare that our samples will be 2 dimensional column vectors.  
    typedef matrix<double,2,1> sample_type;

    // Now we are making a typedef for the kind of kernel we want to use.  I picked the
    // radial basis kernel because it only has one parameter and generally gives good
    // results without much fiddling.
    typedef radial_basis_kernel<sample_type> kernel_type;

    // Here we declare an instance of the kcentroid object.  The first argument to the constructor
    // is the kernel we wish to use.  The second is a parameter that determines the numerical 
    // accuracy with which the object will perform part of the learning algorithm.  Generally
    // smaller values give better results but cause the algorithm to run slower.  You just have
    // to play with it to decide what balance of speed and accuracy is right for your problem.
    // Here we have set it to 0.01.
49
50
51
52
53
54
55
56
57
58
59
60
61
62
    //
    // Also, since we are using the radial basis kernel we have to pick the RBF width parameter.
    // Here we have it set to 0.1.  But in general, a reasonable way of picking this value is
    // to start with some initial guess and to just run the algorithm.  Then print out 
    // test.dictionary_size() to see how many support vectors the kcentroid object is using.
    // And a good rule of thumb is that you should have somewhere in the range of 10-100 
    // support vectors.  So if you aren't in that range then you can change the RBF parameter.
    // Making it smaller will decrease the dictionary size and making it bigger will increase
    // the dictionary size.   
    //
    // So what I often do is I set the kcentroid's second parameter to 0.01 or 0.001.  Then
    // I find an RBF kernel parameter that gives me the number of support vectors that I 
    // feel is appropriate for the problem I'm trying to solve.  Again, this just comes down
    // to playing with it and getting a feel for how things work.  
Davis King's avatar
Davis King committed
63
64
    kcentroid<kernel_type> test(kernel_type(0.1),0.01);

65

Davis King's avatar
Davis King committed
66
67
68
69
70
71
72
73
74
    // now we train our object on a few samples of the sinc function.
    sample_type m;
    for (double x = -15; x <= 8; x += 1)
    {
        m(0) = x;
        m(1) = sinc(x);
        test.train(m);
    }

Davis King's avatar
Davis King committed
75
    running_stats<double> rs;
Davis King's avatar
Davis King committed
76

Davis King's avatar
Davis King committed
77
78
79
80
    // Now lets output the distance from the centroid to some points that are from the sinc function.
    // These numbers should all be similar.  We will also calculate the statistics of these numbers
    // by accumulating them into the running_stats object called rs.  This will let us easily
    // find the mean and standard deviation of the distances for use below.
Davis King's avatar
Davis King committed
81
    cout << "Points that are on the sinc function:\n";
Davis King's avatar
Davis King committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0;   m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -4.1; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));

    cout << endl;
    // Lets output the distance from the centroid to some points that are NOT from the sinc function.
    // These numbers should all be significantly bigger than previous set of numbers.  We will also
    // use the rs.scale() function to find out how many standard deviations they are away from the 
    // mean of the test points from the sinc function.  So in this case our criterion for "significantly bigger"
    // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function.
Davis King's avatar
Davis King committed
96
    cout << "Points that are NOT on the sinc function:\n";
97
98
99
100
101
102
103
    m(0) = -1.5; m(1) = sinc(m(0))+4;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -1.5; m(1) = sinc(m(0))+3;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0;   m(1) = -sinc(m(0));    cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0.5; m(1) = -sinc(m(0));    cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -4.1; m(1) = sinc(m(0))+2;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0.5; m(1) = sinc(m(0))+1;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
Davis King's avatar
Davis King committed
104
105
106
107
108
109
110
111
112
113
114
115
116

    // The output is as follows:
    /*
        Points that are on the sinc function:
            0.869861
            0.869861
            0.873182
            0.872628
            0.870352
            0.869861
            0.872628

        Points that are NOT on the sinc function:
117
118
119
120
121
122
123
            1.06306 is 125.137 standard deviations from sinc.
            1.0215 is 98.0313 standard deviations from sinc.
            0.92136 is 32.717 standard deviations from sinc.
            0.918282 is 30.7096 standard deviations from sinc.
            0.930931 is 38.9595 standard deviations from sinc.
            0.897916 is 17.4264 standard deviations from sinc.
            0.913855 is 27.822 standard deviations from sinc.
Davis King's avatar
Davis King committed
124
125
126
127
    */

    // So we can see that in this example the kcentroid object correctly indicates that 
    // the non-sinc points are definitely not points from the sinc function.
Davis King's avatar
Davis King committed
128
129
130
}