Added upper_bound_function object.

1e90fc6d · Davis King · 1bfc31de · 1e90fc6d · 1e90fc6d · 1e90fc6d
Commit 1e90fc6d authored Nov 12, 2017 by Davis King
5 changed files
--- a/dlib/global_optimization.h
+++ b/dlib/global_optimization.h
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_GLOBAL_OPTIMIZATIOn_HEADER
+#define DLIB_GLOBAL_OPTIMIZATIOn_HEADER
+#include "global_optimization/upper_bound_function.h"
+#endif // DLIB_GLOBAL_OPTIMIZATIOn_HEADER
--- a/dlib/global_optimization/upper_bound_function.h
+++ b/dlib/global_optimization/upper_bound_function.h
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_
+#define DLIB_UPPER_bOUND_FUNCTION_Hh_
+#include "upper_bound_function_abstract.h"
+#include "../svm/svm_c_linear_dcd_trainer.h"
+#include "../statistics.h"
+#include <limits>
+#include <utility>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    struct function_evaluation
+    {
+        function_evaluation() = default;
+        function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
+        matrix<double,0,1> x;
+        double y = std::numeric_limits<double>::quiet_NaN();
+    };
+// ----------------------------------------------------------------------------------------
+    class upper_bound_function
+    {
+    public:
+        upper_bound_function(
+        ) = default;
+        explicit upper_bound_function(
+            const std::vector<function_evaluation>& _points,
+            const double relative_noise_magnitude = 0.001,
+            const double solver_eps = 0.0001
+        ) : points(_points)
+        {
+            DLIB_CASSERT(points.size() > 1);
+            DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
+            DLIB_CASSERT(relative_noise_magnitude >= 0);
+            DLIB_CASSERT(solver_eps > 0);
+            const long dims = points[0].x.size();
+            for (auto& p : points)
+                DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
+            using sample_type = std::vector<std::pair<size_t,double>>;
+            using kernel_type = sparse_linear_kernel<sample_type>;
+            std::vector<sample_type> x;
+            std::vector<double> y;
+            // normalize the data so the values aren't extreme
+            std::vector<matrix<double,0,1>> temp;
+            for (auto& v : points)
+                temp.push_back(v.x);
+            xnormalizer.train(temp);
+            for (auto& v : points)
+                v.x = xnormalizer(v.x);
+            x.reserve(points.size()*(points.size()-1)/2);
+            y.reserve(points.size()*(points.size()-1)/2);
+            sample_type samp;
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                for (size_t j = i+1; j < points.size(); ++j)
+                {
+                    samp.clear();
+                    for (long k = 0; k < dims; ++k)
+                    {
+                        double temp = points[i].x(k) - points[j].x(k);
+                        samp.push_back(std::make_pair(k, temp*temp));
+                    }
+                    if (points[i].y > points[j].y)
+                        samp.push_back(std::make_pair(dims + j, relative_noise_magnitude));
+                    else
+                        samp.push_back(std::make_pair(dims + i, relative_noise_magnitude));
+                    double diff = points[i].y - points[j].y;
+                    samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff));
+                    x.push_back(samp);
+                    y.push_back(1);
+                }
+            }
+            svm_c_linear_dcd_trainer<kernel_type> trainer;
+            trainer.set_c(std::numeric_limits<double>::infinity());
+            //trainer.be_verbose();
+            trainer.force_last_weight_to_1(true);
+            trainer.set_epsilon(solver_eps);
+            auto df = trainer.train(x,y);
+            const auto& bv = df.basis_vectors(0);
+            slopes.set_size(dims);
+            for (long i = 0; i < dims; ++i)
+                slopes(i) = bv[i].second;
+            //cout << "slopes:" << trans(slopes);
+            offsets.resize(points.size());
+            auto s = x.begin();
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                for (size_t j = i+1; j < points.size(); ++j)
+                {
+                    double val = df(*s);
+                    // If the constraint wasn't exactly satisfied then we need to adjust
+                    // the offsets so that it is satisfied.  So we check for that here
+                    if (points[i].y > points[j].y)
+                    {
+                        if (val + offsets[j] < 1)
+                            offsets[j] = 1-val;
+                    }
+                    else
+                    {
+                        if (val + offsets[i] < 1)
+                            offsets[i] = 1-val;
+                    }
+                    ++s;
+                }
+            }
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
+            }
+        }
+        long num_points(
+        ) const 
+        { 
+            return points.size(); 
+        }
+        long dimensionality(
+        ) const
+        { 
+            if (points.size() == 0)
+                return 0;
+            else
+                return points[0].x.size();
+        }
+        double operator() (
+            matrix<double,0,1> x
+        ) const
+        {
+            DLIB_CASSERT(num_points() > 0);
+            DLIB_CASSERT(x.size() == dimensionality());
+            x = xnormalizer(x);
+            double upper_bound = std::numeric_limits<double>::infinity();
+            for (size_t i = 0; i < points.size(); ++i)
+            {
+                const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
+                upper_bound = std::min(upper_bound, local_bound);
+            }
+            return upper_bound;
+        }
+    private:
+        vector_normalizer<matrix<double,0,1>> xnormalizer;
+        std::vector<function_evaluation> points;
+        std::vector<double> offsets; // offsets.size() == points.size()
+        matrix<double,0,1> slopes; // slopes.size() == points[0].first.size()
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_UPPER_bOUND_FUNCTION_Hh_
--- a/dlib/global_optimization/upper_bound_function_abstract.h
+++ b/dlib/global_optimization/upper_bound_function_abstract.h
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
+#ifdef DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
+#include "../matrix.h"
+#include <limits>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    struct function_evaluation
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object records the output of a real valued function in response to
+                some input. 
+                In particular, if you have a function F(x) then the function_evaluation is
+                simply a struct that records x and the scalar value F(x).
+        !*/
+        function_evaluation() = default;
+        function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
+        matrix<double,0,1> x;
+        double y = std::numeric_limits<double>::quiet_NaN();
+    };
+// ----------------------------------------------------------------------------------------
+    class upper_bound_function
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a non-parametric function that can be used to define
+                an upper bound on some more complex and unknown function.  To describe this
+                precisely, lets assume there is a function F(x) which you are capable of
+                sampling from but otherwise know nothing about, and that you would like to
+                find an upper bounding function U(x) such that U(x) >= F(x) for any x.  It
+                would also be good if U(x)-F(x) was minimal.  I.e. we would like U(x) to be
+                a tight upper bound, not something vacuous like U(x) = infinity.
+                The upper_bound_function class is a tool for creating this kind of upper
+                bounding function from a set of function_evaluations of F(x).  We do this
+                by considering only U(x) of the form:
+                    U(x) = {
+                       double min_ub = infinity;
+                       for (size_t i = 0; i < POINTS.size(); ++i) {
+                            function_evaluation p = POINTS[i]
+                            double local_bound = p.y + sqrt(noise_terms[i] + sqrt(trans(p.x-x)*M*(p.x-x)))
+                            min_ub = min(min_ub, local_bound)
+                        }
+                    }
+                Where POINTS is an array of function_evaluation instances drawn from F(x),
+                M is a diagonal matrix, and noise_terms is an array of scalars.
+                To create an upper bound U(x), the upper_bound_function takes a POINTS array
+                containing evaluations of F(x) as input and solves the following quadratic
+                program to find the parameters of U(x):
+                    min_{M,noise_terms}:  sum(squared(M)) + sum(squared(noise_terms/relative_noise_magnitude))
+                    s.t.   U(POINTS[i].x) >= POINTS[i].y,  for all i 
+                           noise_terms[i] >= 0
+                           diag(M) >= 0
+                Therefore, the quadratic program finds the U(x) that always upper bounds
+                F(x) on the supplied POINTS, but is otherwise as small as possible.
+                The inspiration for the upper_bound_function object came from this
+                excellent paper:
+                    Global optimization of Lipschitz functions 
+                    Malherbe, Cédric and Vayatis, Nicolas 
+                    International Conference on Machine Learning - 2017
+                In that paper, they propose to use a simpler U(x) where noise_terms is
+                always 0 and M is a diagonal matrix where each diagonal element is the same
+                value.  Therefore, there is only a single scalar parameter for U(x) in
+                their formulation of the problem.  This causes difficulties if F(x) is
+                stochastic or has discontinuities since, without the noise term, M will
+                become really huge and the upper bound becomes vacuously large.  It is also
+                problematic if the gradient of F(x) with respect to x contains elements of
+                widely varying magnitude since the simpler formulation of U(x) assumes a
+                uniform rate of change regardless of which dimension is varying. 
+        !*/
+    public:
+        upper_bound_function(
+        );
+        /*!
+            ensures
+                - #num_points() == 0
+                - #dimensionality() == 0
+        !*/
+        explicit upper_bound_function(
+            const std::vector<function_evaluation>& points,
+            const double relative_noise_magnitude = 0.001,
+            const double solver_eps = 0.0001
+        );
+        /*!
+            requires
+                - points.size() > 1
+                - all the x vectors in points must have the same non-zero dimensionality.
+                - solver_eps > 0
+            ensures
+                - Creates an upper bounding function U(x), as described above, assuming that
+                  the given points are drawn from F(x).
+                - Uses the provided relative_noise_magnitude when solving the QP, as
+                  described above.  Note that relative_noise_magnitude can be set to 0.  If
+                  you do this then all the noise terms are constrained to 0.  You should
+                  only do this if you know F(x) is non-stochastic and continuous
+                  everywhere.
+                - When solving the QP used to find the parameters of U(x), the upper
+                  bounding function, we solve the QP to solver_eps accuracy.
+                - #num_points() == points.size()
+                - #dimensionality() == points[0].x.size()
+        !*/
+        long num_points(
+        ) const;
+        /*!
+            ensures
+                - returns the number of points used to define the upper bounding function.
+        !*/
+        long dimensionality(
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the input vectors to the upper bounding function. 
+        !*/
+        double operator() (
+            matrix<double,0,1> x
+        ) const;
+        /*!
+            requires
+                - num_points() > 0
+                - x.size() == dimensionality()
+            ensures
+                - return U(x)
+                  (i.e. returns the upper bound on F(x) at x given by our upper bounding function)
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_UPPER_bOUND_FUNCTION_ABSTRACT_Hh_
--- a/dlib/test/CMakeLists.txt
+++ b/dlib/test/CMakeLists.txt
@@ -103,6 +103,7 @@ set (tests
   one_vs_one_trainer.cpp
   optimization.cpp
   optimization_test_functions.cpp
+   global_optimization.cpp
   opt_qp_solver.cpp
   parallel_for.cpp
   parse.cpp

--- a/dlib/test/global_optimization.cpp
+++ b/dlib/test/global_optimization.cpp
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#include <dlib/global_optimization.h>
+#include <dlib/statistics.h>
+#include <sstream>
+#include <string>
+#include <cstdlib>
+#include <ctime>
+#include <vector>
+#include <dlib/rand.h>
+#include "tester.h"
+namespace  
+{
+    using namespace test;
+    using namespace dlib;
+    using namespace std;
+    logger dlog("test.global_optimization");
+// ----------------------------------------------------------------------------------------
+    void test_upper_bound_function(double relative_noise_magnitude, double solver_eps)
+    {
+        print_spinner();
+        dlog << LINFO << "test_upper_bound_function,  relative_noise_magnitude="<< relative_noise_magnitude  << ", solver_eps=" << solver_eps;
+        auto rosen = [](const matrix<double,0,1>& x) { return -1*( 100*std::pow(x(1) - x(0)*x(0),2.0) + std::pow(1 - x(0),2)); };
+        dlib::rand rnd;
+        auto make_rnd = [&rnd]() { matrix<double,0,1> x(2); x = 2*rnd.get_random_double(), 2*rnd.get_random_double(); return x; };
+        std::vector<function_evaluation> evals;
+        for (int i = 0; i < 200; ++i)
+        {
+            auto x = make_rnd();
+            evals.emplace_back(x,rosen(x));
+        }
+        upper_bound_function ub(evals, relative_noise_magnitude, solver_eps);
+        DLIB_TEST(ub.num_points() == (long)evals.size());
+        DLIB_TEST(ub.dimensionality() == 2);
+        for (auto& ev : evals)
+        {
+            dlog << LINFO << ub(ev.x) - ev.y;
+            DLIB_TEST_MSG(ub(ev.x) - ev.y > -1e10, ub(ev.x) - ev.y);
+        }
+        if (solver_eps < 0.001)
+        {
+            dlog << LINFO << "out of sample points: ";
+            for (int i = 0; i < 10; ++i)
+            {
+                auto x = make_rnd();
+                dlog << LINFO << ub(x) - rosen(x);
+                DLIB_TEST_MSG(ub(x) - rosen(x) > 1e-10, ub(x) - rosen(x));
+            }
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    class global_optimization_tester : public tester
+    {
+    public:
+        global_optimization_tester (
+        ) :
+            tester ("test_global_optimization",
+                    "Runs tests on the global optimization components.")
+        {}
+        void perform_test (
+        )
+        {
+            test_upper_bound_function(0.1, 1e-6);
+            test_upper_bound_function(0.0, 1e-6);
+            test_upper_bound_function(0.0, 1e-1);
+        }
+    } a;
+}