sequence_labeler_ex.cpp 9.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*

    This is an example illustrating the use of the support vector machine
    utilities from the dlib C++ Library.  

    This example creates a simple set of data to train on and then shows
    you how to use the cross validation and svm training functions
    to find a good decision function that can classify examples in our
    data set.


    The data used in this example will be 2 dimensional data and will
    come from a distribution where points with a distance less than 10
    from the origin are labeled +1 and all other points are labeled
    as -1.
        
*/


#include <iostream>
#include "dlib/svm_threaded.h"
#include "dlib/rand.h"

using namespace std;
using namespace dlib;


const unsigned long num_label_states = 3; // the "hidden" states
const unsigned long num_sample_states = 3;

// ----------------------------------------------------------------------------------------

class feature_extractor
{
public:
    typedef unsigned long sample_type; 

    unsigned long num_features() const
    {
        return num_label_states*num_label_states + num_label_states*num_sample_states;
    }

    unsigned long order() const 
    { 
        return 1; 
    }

    unsigned long num_labels() const 
    { 
        return num_label_states; 
    }

    template <typename feature_setter, typename EXP>
    void get_features (
        feature_setter& set_feature,
        const std::vector<sample_type>& x,
        const matrix_exp<EXP>& y,
        unsigned long position
    ) const
    {
        if (y.size() > 1)
            set_feature(y(1)*num_label_states + y(0));

        set_feature(num_label_states*num_label_states +
                    y(0)*num_sample_states + x[position]);
    }
};

Davis King's avatar
Davis King committed
70
71
72
73
74
75
76
void serialize(const feature_extractor&, std::ostream&) {}
void deserialize(feature_extractor&, std::istream&) {}

// ----------------------------------------------------------------------------------------

void make_dataset (
    const matrix<double>& transition_probabilities,
Davis King's avatar
Davis King committed
77
    const matrix<double>& emission_probabilities,
Davis King's avatar
Davis King committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    std::vector<std::vector<unsigned long> >& samples,
    std::vector<std::vector<unsigned long> >& labels,
    unsigned long dataset_size
);
/*!
    requires
        - transition_probabilities.nr() == transition_probabilities.nc()
        - transition_probabilities.nr() == emission_probabilities.nr()
        - The rows of transition_probabilities and emission_probabilities must sum to 1.
          (i.e. sum_cols(transition_probabilities) and sum_cols(emission_probabilities)
          must evaluate to vectors of all 1s.)
    ensures
        - This function randomly samples a bunch of sequences from the HMM defined by 
          transition_probabilities and emission_probabilities. 
        - The HMM is defined by:
Davis King's avatar
Davis King committed
93
94
95
96
            - The probability of transitioning from hidden state H1 to H2 
              is given by transition_probabilities(H1,H2).
            - The probability of a hidden state H producing an observed state
              O is given by emission_probabilities(H,O).
Davis King's avatar
Davis King committed
97
98
99
100
101
102
103
        - #samples.size() == labels.size() == dataset_size
        - for all valid i:
            - #labels[i] is a randomly sampled sequence of hidden states from the
              given HMM.  #samples[i] is its corresponding randomly sampled sequence
              of observed states.
!*/

104
105
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
106
107
int main()
{
Davis King's avatar
Davis King committed
108
109
110
111
    matrix<double> transition_probabilities(num_label_states, num_label_states);
    transition_probabilities = 0.05, 0.90, 0.05,
                               0.05, 0.05, 0.90,
                               0.90, 0.05, 0.05;
Davis King's avatar
Davis King committed
112
113
114
115
116
117
118
119
120
121
122
123

    // set this up so emission_probabilities(L,X) == The probability of a state with label L 
    // emitting an X.
    matrix<double> emission_probabilities(num_label_states,num_sample_states);
    emission_probabilities = 0.5, 0.5, 0.0,
                             0.0, 0.5, 0.5,
                             0.5, 0.0, 0.5;



    std::vector<std::vector<unsigned long> > samples;
    std::vector<std::vector<unsigned long> > labels;
Davis King's avatar
Davis King committed
124
    make_dataset(transition_probabilities,emission_probabilities, 
Davis King's avatar
Davis King committed
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
                 samples, labels, 1000);

    cout << "samples.size(): "<< samples.size() << endl;

    // print out some of the randomly sampled sequences
    for (int i = 0; i < 10; ++i)
    {
        cout << "hidden states:   " << trans(vector_to_matrix(labels[i]));
        cout << "observed states: " << trans(vector_to_matrix(samples[i]));
        cout << "******************************" << endl;
    }

    structural_sequence_labeling_trainer<feature_extractor> trainer;
    trainer.set_c(4);
    trainer.set_num_threads(4);



    // Learn to do sequence labeling from the dataset
    sequence_labeler<feature_extractor> labeler = trainer.train(samples, labels);
Davis King's avatar
Davis King committed
145
146
147
148
149
150

    std::vector<unsigned long> predicted_labels = labeler(samples[0]);
    cout << "true hidden states:      "<< trans(vector_to_matrix(labels[0]));
    cout << "predicted hidden states: "<< trans(vector_to_matrix(predicted_labels));


Davis King's avatar
Davis King committed
151
152
153


    // We can also do cross-validation 
Davis King's avatar
Davis King committed
154
    matrix<double> confusion_matrix;
Davis King's avatar
Davis King committed
155
156
157
158
159
160
161
162
163
164
    confusion_matrix = cross_validate_sequence_labeler(trainer, samples, labels, 4);
    cout << "\ncross-validation: " << endl;
    cout << confusion_matrix;
    cout << "label accuracy: "<< sum(diag(confusion_matrix))/sum(confusion_matrix) << endl;



    matrix<double,0,1> true_hmm_model_weights = log(join_cols(reshape_to_column_vector(transition_probabilities),
                                                              reshape_to_column_vector(emission_probabilities)));

165
    sequence_labeler<feature_extractor> labeler_true(true_hmm_model_weights); 
Davis King's avatar
Davis King committed
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192

    confusion_matrix = test_sequence_labeler(labeler_true, samples, labels);
    cout << "\nTrue HMM model: " << endl;
    cout << confusion_matrix;
    cout << "label accuracy: "<< sum(diag(confusion_matrix))/sum(confusion_matrix) << endl;







    // Finally, the labeler can be serialized to disk just like most dlib objects.
    ofstream fout("labeler.dat", ios::binary);
    serialize(labeler, fout);
    fout.close();

    // recall from disk
    ifstream fin("labeler.dat", ios::binary);
    deserialize(labeler, fin);
}

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
//              Code for creating a bunch of random samples from our HMM.
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
193
194
195
196
197
198
199
200
201

void sample_hmm (
    dlib::rand& rnd,
    const matrix<double>& transition_probabilities,
    const matrix<double>& emission_probabilities,
    unsigned long previous_label,
    unsigned long& next_label,
    unsigned long& next_sample
)
Davis King's avatar
Davis King committed
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/*!
    requires
        - previous_label < transition_probabilities.nr()
        - transition_probabilities.nr() == transition_probabilities.nc()
        - transition_probabilities.nr() == emission_probabilities.nr()
        - The rows of transition_probabilities and emission_probabilities must sum to 1.
          (i.e. sum_cols(transition_probabilities) and sum_cols(emission_probabilities)
          must evaluate to vectors of all 1s.)
    ensures
        - This function randomly samples the HMM defined by transition_probabilities
          and emission_probabilities assuming that the previous hidden state
          was previous_label. 
        - The HMM is defined by:
            - P(next_label |previous_label) == transition_probabilities(previous_label, next_label)
            - P(next_sample|next_label)     == emission_probabilities  (next_label,     next_sample)
        - #next_label == the sampled value of the hidden state
        - #next_sample == the sampled value of the observed state
!*/
220
{
Davis King's avatar
Davis King committed
221
    // sample next_label
222
223
224
225
226
227
228
    double p = rnd.get_random_double();
    for (long c = 0; p >= 0 && c < transition_probabilities.nc(); ++c)
    {
        next_label = c;
        p -= transition_probabilities(previous_label, c);
    }

Davis King's avatar
Davis King committed
229
    // now sample next_sample
230
231
232
233
234
235
236
237
238
239
240
241
    p = rnd.get_random_double();
    for (long c = 0; p >= 0 && c < emission_probabilities.nc(); ++c)
    {
        next_sample = c;
        p -= emission_probabilities(next_label, c);
    }
}

// ----------------------------------------------------------------------------------------

void make_dataset (
    const matrix<double>& transition_probabilities,
Davis King's avatar
Davis King committed
242
    const matrix<double>& emission_probabilities,
243
244
245
246
247
248
249
250
251
252
253
254
255
    std::vector<std::vector<unsigned long> >& samples,
    std::vector<std::vector<unsigned long> >& labels,
    unsigned long dataset_size
)
{
    samples.clear();
    labels.clear();

    dlib::rand rnd;

    // now randomly sample some labeled sequences from our Hidden Markov Model
    for (unsigned long iter = 0; iter < dataset_size; ++iter)
    {
Davis King's avatar
Davis King committed
256
257
258
        const unsigned long sequence_size = rnd.get_random_32bit_number()%20+3;
        std::vector<unsigned long> sample(sequence_size);
        std::vector<unsigned long> label(sequence_size);
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

        unsigned long previous_label = rnd.get_random_32bit_number()%num_label_states;
        for (unsigned long i = 0; i < sample.size(); ++i)
        {
            unsigned long next_label, next_sample;
            sample_hmm(rnd, transition_probabilities, emission_probabilities, 
                       previous_label, next_label, next_sample);

            label[i] = next_label;
            sample[i] = next_sample;

            previous_label = next_label;
        }

        samples.push_back(sample);
        labels.push_back(label);
    }
}

// ----------------------------------------------------------------------------------------