sequence_labeler_abstract.h 13.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
// Copyright (C) 2011  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_SEQUENCE_LAbELER_ABSTRACT_H___
#ifdef DLIB_SEQUENCE_LAbELER_ABSTRACT_H___

#include "../matrix.h"
#include <vector>
#include "../optimization/find_max_factor_graph_viterbi_abstract.h"

namespace dlib
{

13
14
15
16
17
18
// ----------------------------------------------------------------------------------------

    class example_feature_extractor
    {
        /*!
            WHAT THIS OBJECT REPRESENTS
Davis King's avatar
Davis King committed
19
20
21
22
23
24
25
                This object defines the interface a feature extractor must implement
                if it is to be used with the sequence_labeler defined at the bottom
                of this file.  
                
                The model used by sequence_labeler objects is the following.  
                Given an input sequence x, predict an output label sequence y
                such that:
Davis King's avatar
Davis King committed
26
                    y == argmax_Y dot(w, PSI(x,Y))
Davis King's avatar
Davis King committed
27
28
29
30
31
32
                    Where w is a parameter vector.

                Therefore, a feature extractor defines how the PSI(x,y) feature vector 
                is calculated.  It also defines how many output labels there are as 
                well as the order of the model.  

Davis King's avatar
Davis King committed
33
34
35
36
                Finally, note that PSI(x,y) is a sum of feature vectors, each derived 
                from the entire input sequence x but only part of the label sequence y.
                Each of these constituent feature vectors is defined by the get_features() 
                method of this class.
Davis King's avatar
Davis King committed
37
38
39
40
41

            THREAD SAFETY
                Instances of this object should be thread safe, that is, it should 
                be safe for multiple threads to make concurrent calls to the member 
                functions of this object.
42
43
44
        !*/

    public:
45
46
47
        // This should be the type used to represent an input sequence.  It can be
        // anything so long as it has a .size() which returns the length of the sequence.
        typedef the_type_used_to_represent_a_sequence sequence_type;
48
49
50

        example_feature_extractor (
        ); 
Davis King's avatar
Davis King committed
51
52
53
54
        /*!
            ensures
                - this object is properly initialized
        !*/
55
56
57

        unsigned long num_features (
        ) const;
Davis King's avatar
Davis King committed
58
59
60
61
        /*!
            ensures
                - returns the dimensionality of the PSI() feature vector.  
        !*/
62
63
64

        unsigned long order(
        ) const; 
Davis King's avatar
Davis King committed
65
66
67
68
69
70
71
72
73
74
        /*!
            ensures
                - This object represents a Markov model on the output labels.
                  This parameter defines the order of the model.  That is, this 
                  value controls how many previous label values get to be taken 
                  into consideration when performing feature extraction for a
                  particular element of the input sequence.  Note that the runtime
                  of the algorithm is exponential in the order.  So don't make order
                  very large.
        !*/
75
76
77

        unsigned long num_labels(
        ) const; 
Davis King's avatar
Davis King committed
78
79
80
81
        /*!
            ensures
                - returns the number of possible output labels.
        !*/
82
83
84

        template <typename EXP>
        bool reject_labeling (
85
            const sequence_type& x,
Davis King's avatar
Davis King committed
86
            const matrix_exp<EXP>& y,
87
88
89
90
91
            unsigned long position
        ) const;
        /*!
            requires
                - EXP::type == unsigned long
Davis King's avatar
Davis King committed
92
93
                  (i.e. y contains unsigned longs)
                - position < x.size()
Davis King's avatar
Davis King committed
94
                - y.size() == min(position, order()) + 1
Davis King's avatar
Davis King committed
95
96
                - is_vector(y) == true
                - max(y) < num_labels() 
97
            ensures
Davis King's avatar
Davis King committed
98
99
100
                - for all valid i:
                    - interprets y(i) as the label corresponding to x[position-i]
                - if (the labeling in y for x[position] is always the wrong labeling) then
101
                    - returns true
Davis King's avatar
Davis King committed
102
103
                      (note that reject_labeling() is just an optional tool to allow you 
                      to overrule the normal labeling algorithm.  You don't have to use
104
105
106
                      it.  So if you don't include a reject_labeling() method in your
                      feature extractor it is the same as including one that always
                      returns false.)
107
108
109
110
111
112
113
                - else
                    - returns false
        !*/

        template <typename feature_setter, typename EXP>
        void get_features (
            feature_setter& set_feature,
114
            const sequence_type& x,
Davis King's avatar
Davis King committed
115
            const matrix_exp<EXP>& y,
116
117
118
119
120
            unsigned long position
        ) const;
        /*!
            requires
                - EXP::type == unsigned long
Davis King's avatar
Davis King committed
121
                  (i.e. y contains unsigned longs)
Davis King's avatar
Davis King committed
122
                - reject_labeling(x,y,position) == false
Davis King's avatar
Davis King committed
123
                - position < x.size()
Davis King's avatar
Davis King committed
124
                - y.size() == min(position, order()) + 1
Davis King's avatar
Davis King committed
125
126
                - is_vector(y) == true
                - max(y) < num_labels() 
127
128
129
130
                - set_feature is a function object which allows expressions of the form:
                    - set_features((unsigned long)feature_index, (double)feature_value);
                    - set_features((unsigned long)feature_index);
            ensures
Davis King's avatar
Davis King committed
131
132
133
                - for all valid i:
                    - interprets y(i) as the label corresponding to x[position-i]
                - This function computes the part of PSI() corresponding to the x[position]
Davis King's avatar
Davis King committed
134
135
136
                  element of the input sequence.  Moreover, this part of PSI() is returned as 
                  a sparse vector by invoking set_feature().  For example, to set the feature 
                  with an index of 55 to the value of 1 this method would call:
Davis King's avatar
Davis King committed
137
138
139
140
141
                    set_feature(55);
                  Or equivalently:
                    set_feature(55,1);
                  Therefore, the first argument to set_feature is the index of the feature 
                  to be set while the second argument is the value the feature should take.
142
143
144
145
                  Additionally, note that calling set_feature() multiple times with the same 
                  feature index does NOT overwrite the old value, it adds to the previous 
                  value.  For example, if you call set_feature(55) 3 times then it will
                  result in feature 55 having a value of 3.
Davis King's avatar
Davis King committed
146
                - This function only calls set_feature() with feature_index values < num_features()
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
        !*/

    };

// ----------------------------------------------------------------------------------------

    void serialize(
        const example_feature_extractor& item,
        std::ostream& out
    );
    /*!
        provides serialization support 
    !*/

    void deserialize(
        example_feature_extractor& item, 
        std::istream& in
    );
    /*!
        provides deserialization support 
    !*/

169
170
171
172
173
174
175
176
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    template <
        typename feature_extractor 
        >
    bool contains_invalid_labeling (
        const feature_extractor& fe,
177
        const typename feature_extractor::sequence_type& x,
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
        const std::vector<unsigned long>& y
    );
    /*!
        requires
            - feature_extractor must be an object that implements an interface compatible 
              with the example_feature_extractor discussed above.
        ensures
            - if (x.size() != y.size() ||
                fe.reject_labeling() rejects any of the labels in y) then
                - returns true
            - else
                - returns false
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename feature_extractor 
        >
    bool contains_invalid_labeling (
        const feature_extractor& fe,
199
        const std::vector<typename feature_extractor::sequence_type>& x,
200
201
202
203
204
205
206
207
208
209
210
211
212
213
        const std::vector<std::vector<unsigned long> >& y
    );
    /*!
        requires
            - feature_extractor must be an object that implements an interface compatible 
              with the example_feature_extractor discussed above.
        ensures
            - if (x.size() != y.size() ||
                contains_invalid_labeling(fe,x[i],y[i]) == true for some i ) then
                - returns true
            - else
                - returns false
    !*/

214
// ----------------------------------------------------------------------------------------
215
216
217
218
219
220
221
222
// ----------------------------------------------------------------------------------------

    template <
        typename feature_extractor
        >
    class sequence_labeler
    {
        /*!
223
224
225
226
            REQUIREMENTS ON feature_extractor
                It must be an object that implements an interface compatible with 
                the example_feature_extractor discussed above.

227
            WHAT THIS OBJECT REPRESENTS
Davis King's avatar
Davis King committed
228
229
230
231
232
233
234
235
236
237
238
239
240
                This object is a tool for doing sequence labeling.  In particular,
                it is capable of representing sequence labeling models such as
                those produced by Hidden Markov SVMs or Conditional Random fields.
                See the following papers for an introduction to these techniques:
                    - Hidden Markov Support Vector Machines by 
                      Y. Altun, I. Tsochantaridis, T. Hofmann
                    - Shallow Parsing with Conditional Random Fields by 
                      Fei Sha and Fernando Pereira


                The model used by this object is the following.  Given
                an input sequence x, predict an output label sequence y
                such that:
Davis King's avatar
Davis King committed
241
                    y == argmax_Y dot(get_weights(), PSI(x,Y))
Davis King's avatar
Davis King committed
242
243
                    Where PSI() is defined by the feature_extractor template
                    argument.  
244
245
246
        !*/

    public:
247
        typedef typename feature_extractor::sequence_type sample_sequence_type;
248
249
        typedef std::vector<unsigned long> labeled_sequence_type;

Davis King's avatar
Davis King committed
250
251
252
253
        sequence_labeler(
        );
        /*!
            ensures
254
255
256
                - #get_feature_extractor() == feature_extractor() 
                  (i.e. it will have its default value)
                - #get_weights().size() == #get_feature_extractor().num_features()
Davis King's avatar
Davis King committed
257
                - #get_weights() == 0
258
259
260
261
262
263
264
265
266
267
268
269
        !*/

        explicit sequence_labeler(
            const matrix<double,0,1>& weights
        ); 
        /*!
            requires
                - feature_extractor().num_features() == weights.size()
            ensures
                - #get_feature_extractor() == feature_extractor() 
                  (i.e. it will have its default value)
                - #get_weights() == weights
Davis King's avatar
Davis King committed
270
        !*/
271
272

        sequence_labeler(
273
274
            const matrix<double,0,1>& weights,
            const feature_extractor& fe
275
        ); 
276
277
278
279
280
281
282
        /*!
            requires
                - fe.num_features() == weights.size()
            ensures
                - #get_feature_extractor() == fe
                - #get_weights() == weights
        !*/
283
284
285

        const feature_extractor& get_feature_extractor (
        ) const; 
Davis King's avatar
Davis King committed
286
287
288
289
        /*!
            ensures
                - returns the feature extractor used by this object
        !*/
290
291
292

        const matrix<double,0,1>& get_weights (
        ) const;
293
294
        /*!
            ensures
Davis King's avatar
Davis King committed
295
296
                - returns the parameter vector associated with this sequence labeler. 
                  The length of the vector is get_feature_extractor().num_features().  
297
        !*/
298
299

        unsigned long num_labels (
Davis King's avatar
Davis King committed
300
301
302
303
304
305
306
        ) const;
        /*!
            ensures
                - returns get_feature_extractor().num_labels()
                  (i.e. returns the number of possible output labels for each 
                  element of a sequence)
        !*/
307
308
309
310

        labeled_sequence_type operator() (
            const sample_sequence_type& x
        ) const;
Davis King's avatar
Davis King committed
311
312
313
314
315
316
317
318
319
320
        /*!
            requires
                - num_labels() > 0
            ensures
                - returns a vector Y of label values such that:
                    - Y.size() == x.size()
                    - for all valid i: 
                        - Y[i] == the predicted label for x[i]
                        - 0 <= Y[i] < num_labels()
        !*/
321
322
323
324
325

        void label_sequence (
            const sample_sequence_type& x,
            labeled_sequence_type& y
        ) const;
Davis King's avatar
Davis King committed
326
327
328
329
330
331
332
333
334
        /*!
            requires
                - num_labels() > 0
            ensures
                - #y == (*this)(x)
                  (i.e. This is just another interface to the operator() routine
                  above.  This one avoids returning the results by value and therefore
                  might be a little faster in some cases)
        !*/
335
336
337

    };

338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
// ----------------------------------------------------------------------------------------

    template <
        typename feature_extractor
        >
    void serialize (
        const sequence_labeler<feature_extractor>& item,
        std::ostream& out
    );
    /*!
        provides serialization support 
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename feature_extractor
        >
    void deserialize (
        sequence_labeler<feature_extractor>& item,
        std::istream& in 
    );
    /*!
        provides deserialization support 
    !*/

364
365
366
367
368
369
370
// ----------------------------------------------------------------------------------------

}

#endif // DLIB_SEQUENCE_LAbELER_ABSTRACT_H___