Added initial version of the assignment problem learning code.

9d81a1ef · Davis King · 94ae09f5 · 9d81a1ef · 9d81a1ef · 9d81a1ef
Commit 9d81a1ef authored Dec 03, 2011 by Davis King
10 changed files
--- a/dlib/svm.h
+++ b/dlib/svm.h
@@ -35,6 +35,7 @@
 #include "svm/cross_validate_regression_trainer.h"
 #include "svm/cross_validate_object_detection_trainer.h"
 #include "svm/cross_validate_sequence_labeler.h"
+#include "svm/cross_validate_assignment_trainer.h"
 #include "svm/one_vs_all_decision_function.h"
 #include "svm/one_vs_all_trainer.h"
@@ -42,6 +43,7 @@
 #include "svm/structural_svm_problem.h"
 #include "svm/svm_multiclass_linear_trainer.h"
 #include "svm/sequence_labeler.h"
+#include "svm/assignment_function.h"
 #endif // DLIB_SVm_HEADER

--- a/dlib/svm/assignment_function.h
+++ b/dlib/svm/assignment_function.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ASSIGNMENT_FuNCTION_H__
+#define DLIB_ASSIGNMENT_FuNCTION_H__
+#include "assignment_function_abstract.h"
+#include "../matrix.h"
+#include <vector>
+#include "../optimization/max_cost_assignment.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename feature_extractor 
+        >
+    class assignment_function
+    {
+    public:
+        typedef typename feature_extractor::lhs_type lhs_type;
+        typedef typename feature_extractor::rhs_type rhs_type;
+        typedef std::pair<std::vector<lhs_type>, std::vector<rhs_type> > sample_type;
+        typedef std::vector<long> label_type;
+        typedef label_type result_type;
+        assignment_function()
+        {
+            weights.set_size(fe.num_features());
+            weights = 0;
+            force_assignment = false;
+        }
+        explicit assignment_function(
+            const matrix<double,0,1>& weights_
+        ) : 
+            weights(weights_),
+            force_assignment(false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t assignment_function::assignment_function(weights_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe.num_features(): " << fe.num_features() 
+                << "\n\t weights_.size():   " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+        assignment_function(
+            const feature_extractor& fe_,
+            const matrix<double,0,1>& weights_
+        ) :
+            fe(fe_),
+            weights(weights_),
+            force_assignment(false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t assignment_function::assignment_function(fe_,weights_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe_.num_features(): " << fe_.num_features() 
+                << "\n\t weights_.size():    " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+        assignment_function(
+            const feature_extractor& fe_,
+            const matrix<double,0,1>& weights_,
+            bool force_assignment_
+        ) :
+            fe(fe_),
+            weights(weights_),
+            force_assignment(force_assignment_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t assignment_function::assignment_function(fe_,weights_,force_assignment_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe_.num_features(): " << fe_.num_features() 
+                << "\n\t weights_.size():    " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+        result_type operator()(
+            const std::vector<lhs_type>& lhs,
+            const std::vector<rhs_type>& rhs 
+        ) const
+        /*!
+            ensures
+                - returns a vector A such that:
+                    - A.size() == lhs.size()
+                    - if (A[i] != -1) then
+                        - lhs[i] is predicted to associate to rhs[A[i]]
+        !*/
+        {
+            using dlib::sparse_vector::dot;
+            using dlib::dot;
+            matrix<double> cost;
+            unsigned long size;
+            if (force_assignment)
+            {
+                size = std::max(lhs.size(), rhs.size());
+            }
+            else
+            {
+                size = rhs.size() + lhs.size();
+            }
+            cost.set_size(size, size);
+            // now fill out the cost assignment matrix
+            for (long r = 0; r < cost.nr(); ++r)
+            {
+                for (long c = 0; c < cost.nc(); ++c)
+                {
+                    if (r < (long)lhs.size() && c < (long)rhs.size())
+                    {
+                        cost(r,c) = dot(weights, fe(lhs[r], rhs[c]));
+                    }
+                    else
+                    {
+                        cost(r,c) = 0;
+                    }
+                }
+            }
+            std::vector<long> assignment;
+            if (cost.size() != 0)
+            {
+                // max_cost_assignment() only works with integer matrices, so convert from
+                // double to integer.
+                const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
+                matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
+                assignment = max_cost_assignment(int_cost);
+                assignment.resize(lhs.size());
+            }
+            // adjust assignment so that non-assignments have a value of -1
+            for (unsigned long i = 0; i < assignment.size(); ++i)
+            {
+                if (assignment[i] >= (long)rhs.size())
+                    assignment[i] = -1;
+            }
+            return assignment;
+        }
+        result_type operator() (
+            const sample_type& item
+        ) const
+        {
+            return (*this)(item.first, item.second);
+        }
+    private:
+        feature_extractor fe;
+        matrix<double,0,1> weights;
+        bool force_assignment;
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_ASSIGNMENT_FuNCTION_H__
--- a/dlib/svm/assignment_function_abstract.h
+++ b/dlib/svm/assignment_function_abstract.h
--- a/dlib/svm/cross_validate_assignment_trainer.h
+++ b/dlib/svm/cross_validate_assignment_trainer.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_H__
+#define DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_H__
+#include "cross_validate_assignment_trainer_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include "svm.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename assignment_function
+        >
+    double test_assignment_function (
+        const assignment_function& assigner,
+        const std::vector<typename assignment_function::sample_type>& samples,
+        const std::vector<typename assignment_function::label_type>& labels
+    )
+    {
+        double total_right = 0;
+        double total = 0;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            const std::vector<long>& out = assigner(samples[i]);
+            for (unsigned long j = 0; j < out.size(); ++j)
+            {
+                if (out[j] == labels[i][j])
+                    ++total_right;
+                ++total;
+            }
+        }
+        if (total != 0)
+            return total_right/total;
+        else
+            return 1;
+    }
+// ----------------------------------------------------------------------------------------
+    template <
+        typename trainer_type
+        >
+    double cross_validate_assignment_trainer (
+        const trainer_type& trainer,
+        const std::vector<typename trainer_type::sample_type>& samples,
+        const std::vector<typename trainer_type::label_type>& labels,
+        const long folds
+    )
+    {
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::label_type label_type;
+        const long num_in_test  = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+        running_stats<double> rs;
+        std::vector<sample_type> samples_test, samples_train;
+        std::vector<label_type> labels_test, labels_train;
+        long next_test_idx = 0;
+        for (long i = 0; i < folds; ++i)
+        {
+            samples_test.clear();
+            labels_test.clear();
+            samples_train.clear();
+            labels_train.clear();
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                samples_test.push_back(samples[next_test_idx]);
+                labels_test.push_back(labels[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                samples_train.push_back(samples[next]);
+                labels_train.push_back(labels[next]);
+                next = (next + 1)%samples.size();
+            }
+            rs.add(test_assignment_function(trainer.train(samples_train,labels_train),
+                                            samples_test,
+                                            labels_test));
+        } // for (long i = 0; i < folds; ++i)
+        return rs.mean();
+    }
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_H__
--- a/dlib/svm/cross_validate_assignment_trainer_abstract.h
+++ b/dlib/svm/cross_validate_assignment_trainer_abstract.h
--- a/dlib/svm/structural_assignment_trainer.h
+++ b/dlib/svm/structural_assignment_trainer.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_H__
+#define DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_H__
+#include "structural_assignment_trainer_abstract.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_assignment_problem.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename feature_extractor
+        >
+    class structural_assignment_trainer
+    {
+    public:
+        typedef typename feature_extractor::lhs_type lhs_type;
+        typedef typename feature_extractor::rhs_type rhs_type;
+        typedef std::pair<std::vector<lhs_type>, std::vector<rhs_type> > sample_type;
+        typedef std::vector<long> label_type;
+        typedef assignment_function<feature_extractor> trained_function_type;
+        const assignment_function<feature_extractor> train (  
+            const std::vector<sample_type>& x,
+            const std::vector<label_type>& y
+        ) const
+        /*!
+            requires
+                - is_assignment_problem(x,y) == true
+                - if (force assignment) then
+                    - is_forced_assignment_problem(x,y) == true
+        !*/
+        {
+            DLIB_CASSERT(is_assignment_problem(x,y), "");
+            feature_extractor fe;
+            bool force_assignment = false;
+            unsigned long num_threads = 1;
+            structural_svm_assignment_problem<feature_extractor> prob(x,y, fe, force_assignment, num_threads);
+            prob.be_verbose();
+            prob.set_c(50);
+            prob.set_epsilon(1e-10);
+            oca solver;
+            matrix<double,0,1> weights; 
+            solver(prob, weights);
+            std::cout << "weights: "<<  trans(weights) << std::endl;
+            return assignment_function<feature_extractor>(fe,weights,force_assignment);
+        }
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_H__
--- a/dlib/svm/structural_assignment_trainer_abstract.h
+++ b/dlib/svm/structural_assignment_trainer_abstract.h
--- a/dlib/svm/structural_svm_assignment_problem.h
+++ b/dlib/svm/structural_svm_assignment_problem.h
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_H__
+#define DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_H__
+#include "structural_svm_assignment_problem_abstract.h"
+#include "../matrix.h"
+#include "assignment_function.h"
+#include <vector>
+#include "structural_svm_problem_threaded.h"
+// ----------------------------------------------------------------------------------------
+namespace dlib
+{
+    template <
+        typename feature_extractor
+        >
+    class structural_svm_assignment_problem : noncopyable,
+        public structural_svm_problem_threaded<matrix<double,0,1>, typename feature_extractor::feature_vector_type >
+    {
+    public:
+        typedef matrix<double,0,1> matrix_type;
+        typedef typename feature_extractor::feature_vector_type feature_vector_type;
+        typedef typename feature_extractor::lhs_type lhs_type;
+        typedef typename feature_extractor::rhs_type rhs_type;
+        typedef std::pair<std::vector<lhs_type>, std::vector<rhs_type> > sample_type;
+        typedef std::vector<long> label_type;
+        structural_svm_assignment_problem(
+            const std::vector<sample_type>& samples_,
+            const std::vector<label_type>& labels_,
+            const feature_extractor& fe_,
+            bool force_assignment_,
+            unsigned long num_threads = 2
+        ) :
+            structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
+            samples(samples_),
+            labels(labels_),
+            fe(fe_),
+            force_assignment(force_assignment_)
+        {
+        }
+    private:
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            return fe.num_features();
+        }
+        virtual long get_num_samples (
+        ) const 
+        {
+            return samples.size();
+        }
+        template <typename psi_type>
+        typename enable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
+            const sample_type& sample, 
+            const label_type& label,
+            psi_type& psi
+        ) const 
+        {
+            psi = 0;
+            for (unsigned long i = 0; i < sample.first.size(); ++i)
+            {
+                if (label[i] != -1)
+                {
+                    psi += fe(sample.first[i], sample.second[label[i]]);
+                }
+            }
+        }
+        template <typename T>
+        void append_to_sparse_vect (
+            T& psi,
+            const T& vect
+        ) const
+        {
+            std::copy(vect.begin(), vect.end(), std::back_inserter(psi));
+        }
+        template <typename psi_type>
+        typename disable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
+            const sample_type& sample, 
+            const label_type& label,
+            psi_type& psi
+        ) const 
+        {
+            psi.clear();
+            for (unsigned long i = 0; i < sample.first.size(); ++i)
+            {
+                if (label[i] != -1)
+                {
+                    append_to_sparse_vect(psi, fe(sample.first[i], sample.second[label[i]]));
+                }
+            }
+        }
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const 
+        {
+            get_joint_feature_vector(samples[idx], labels[idx], psi);
+        }
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            double& loss,
+            feature_vector_type& psi
+        ) const
+        {
+            using dlib::sparse_vector::dot;
+            using dlib::dot;
+            matrix<double> cost;
+            unsigned long size;
+            if (force_assignment)
+            {
+                unsigned long lhs_size = samples[idx].first.size();
+                unsigned long rhs_size = samples[idx].second.size();
+                size = std::max(lhs_size, rhs_size);
+            }
+            else
+            {
+                unsigned long rhs_size = samples[idx].second.size() + samples[idx].first.size();
+                size = rhs_size;
+            }
+            cost.set_size(size, size);
+            // now fill out the cost assignment matrix
+            for (long r = 0; r < cost.nr(); ++r)
+            {
+                for (long c = 0; c < cost.nc(); ++c)
+                {
+                    if (r < (long)samples[idx].first.size())
+                    {
+                        if (c < (long)samples[idx].second.size())
+                        {
+                            cost(r,c) = dot(current_solution, fe(samples[idx].first[r], samples[idx].second[c]));
+                            // add in the loss since this corresponds to an incorrect prediction.
+                            if (c != labels[idx][r])
+                            {
+                                cost(r,c) += 1;
+                            }
+                        }
+                        else
+                        {
+                            if (labels[idx][r] == -1)
+                                cost(r,c) = 0;
+                            else
+                                cost(r,c) = 1; // 1 for the loss
+                        }
+                    }
+                    else
+                    {
+                        cost(r,c) = 0;
+                    }
+                }
+            }
+            std::vector<long> assignment;
+            if (cost.size() != 0)
+            {
+                // max_cost_assignment() only works with integer matrices, so convert from
+                // double to integer.
+                const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
+                matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
+                assignment = max_cost_assignment(int_cost);
+                assignment.resize(samples[idx].first.size());
+            }
+            loss = 0;
+            // adjust assignment so that non-assignments have a value of -1. Also compute loss.
+            for (unsigned long i = 0; i < assignment.size(); ++i)
+            {
+                if (assignment[i] >= (long)samples[idx].second.size())
+                    assignment[i] = -1;
+                if (assignment[i] != labels[idx][i])
+                    loss += 1;
+            }
+            get_joint_feature_vector(samples[idx], assignment, psi);
+        }
+        const std::vector<sample_type>& samples;
+        const std::vector<label_type>& labels;
+        const feature_extractor& fe;
+        bool force_assignment;
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_H__
--- a/dlib/svm/structural_svm_assignment_problem_abstract.h
+++ b/dlib/svm/structural_svm_assignment_problem_abstract.h
--- a/dlib/svm_threaded.h
+++ b/dlib/svm_threaded.h
@@ -12,6 +12,9 @@
 #include "svm/structural_svm_sequence_labeling_problem.h"
 #include "svm/structural_sequence_labeling_trainer.h"
+#include "svm/structural_svm_assignment_problem.h"
+#include "svm/structural_assignment_trainer.h"
 #endif // DLIB_SVm_THREADED_HEADER