Commit 40998905 authored by Davis King's avatar Davis King
Browse files

Added a random forest regression tool.

parent 8b419e43
// Copyright (C) 2018 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_RANDOM_FOReST_H_
#define DLIB_RANDOM_FOReST_H_
#include "random_forest/random_forest_regression.h"
namespace dlib
{
}
#endif // DLIB_RANDOM_FOReST_H_
This diff is collapsed.
// Copyright (C) 2018 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_RANdOM_FOREST_REGRESION_ABSTRACT_H_
#ifdef DLIB_RANdOM_FOREST_REGRESION_ABSTRACT_H_
#include <vector>
#include "../matrix.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class dense_feature_extractor
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool for extracting features from objects. In particular,
it is designed to be used with the random forest regression tools discussed
below.
This particular feature extract does almost nothing since it works on
vectors in R^n and simply selects elements from each vector. However, the
tools below are templated and allow you to design your own feature extracts
that operate on whatever object types you create. So for example, maybe
you want to perform regression on images rather than vectors. Moreover,
your feature extraction could be more complex. Maybe you are selecting
differences between pairs of pixels in an image or doing something
involving geometric transforms during feature extraction. Any of these
kinds of more complex feature extraction patterns can be realized with the
random forest tools by implementing your own feature extractor object and
using it with the random forest objects.
Therefore, you should consider this dense_feature_extractor as an example
that documents the interface as well as the simple default extractor for
use with dense vectors.
THREAD SAFETY
It is safe to call const members of this object from multiple threads.
!*/
public:
typedef uint32_t feature;
typedef matrix<double,0,1> sample_type;
dense_feature_extractor(
);
/*!
ensures
- #max_num_feats() == 0
!*/
void setup (
const std::vector<sample_type>& x,
const std::vector<double>& y
);
/*!
requires
- x.size() == y.size()
- x.size() > 0
- x[0].size() > 0
- all the vectors in x have the same dimensionality.
ensures
- Configures this feature extractor to work on the given training data.
For dense feature extractors all we do is record the dimensionality of
the training vectors.
- #max_num_feats() == x[0].size()
(In general, setup() sets max_num_feats() to some non-zero value so that
the other methods of this object can then be called.)
!*/
void get_random_features (
dlib::rand& rnd,
size_t num,
std::vector<feature>& feats
) const;
/*!
requires
- max_num_feats() != 0
ensures
- This function pulls out num randomly selected features from sample_type. If
sample_type was a simple object like a dense vector then the features could just
be integer indices into the vector. But for other objects it might be
something more complex.
- #feats.size() == min(num, max_num_feats())
!*/
double extract_feature_value (
const sample_type& item,
const feature& f
) const;
/*!
requires
- #max_num_feats() != 0
- f was produced from a call to get_random_features().
ensures
- Extracts the feature value corresponding to f. For this simple dense
feature extractor this simply means returning item(f). But in general
you can design feature extractors that do something more complex.
!*/
size_t max_num_feats (
) const;
/*!
ensures
- returns the number of distinct features this object might extract.
!*/
};
void serialize(const dense_feature_extractor& item, std::ostream& out);
void serialize(dense_feature_extractor& item, std::istream& in);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
struct internal_tree_node
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is an internal node in a regression tree. See the code of
random_forest_regression_function to see how it is used to create a tree.
!*/
uint32_t left;
uint32_t right;
float split_threshold;
typename feature_extractor::feature split_feature;
};
template <typename feature_extractor>
void serialize(const internal_tree_node<feature_extractor>& item, std::ostream& out);
template <typename feature_extractor>
void deserialize(internal_tree_node<feature_extractor>& item, std::istream& in);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor = dense_feature_extractor
>
class random_forest_regression_function
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a regression forest. This is a collection of
decision trees that take an object as input and each vote on a real value
to associate with the object. The final real value output is the average
of all the votes from each of the trees.
!*/
public:
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::sample_type sample_type;
random_forest_regression_function(
);
/*!
ensures
- #num_trees() == 0
!*/
random_forest_regression_function (
feature_extractor_type&& fe_,
std::vector<std::vector<internal_tree_node<feature_extractor>>>&& trees_,
std::vector<std::vector<float>>&& leaves_
);
/*!
requires
- trees.size() > 0
- trees.size() = leaves.size()
- for all valid i:
- leaves[i].size() > 0
- trees[i].size()+leaves[i].size() > the maximal left or right index values in trees[i].
ensures
- #get_internal_tree_nodes() == trees_
- #get_tree_leaves() == leaves_
- #get_feature_extractor() == fe_
!*/
size_t get_num_trees(
) const;
/*!
ensures
- returns the number of trees in this regression forest.
!*/
const std::vector<std::vector<internal_tree_node<feature_extractor>>>& get_internal_tree_nodes (
) const;
/*!
ensures
- returns the internal tree nodes that define the regression trees.
- get_internal_tree_nodes().size() == get_num_trees()
!*/
const std::vector<std::vector<float>>& get_tree_leaves (
) const;
/*!
ensures
- returns the tree leaves that define the regression trees.
- get_tree_leaves().size() == get_num_trees()
!*/
const feature_extractor_type& get_feature_extractor (
) const;
/*!
ensures
- returns the feature extractor used by the trees.
!*/
double operator() (
const sample_type& x
) const;
/*!
requires
- get_num_trees() > 0
ensures
- Maps x to a real value and returns the value. To do this, we find the
get_num_trees() leaf values associated with x and then return the average
of these leaf values.
!*/
};
void serialize(const random_forest_regression_function& item, std::ostream& out);
void deserialize(random_forest_regression_function& item, std::istream& in);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor = dense_feature_extractor
>
class random_forest_regression_trainer
{
/*!
WHAT THIS OBJECT REPRESENTS
!*/
public:
typedef feature_extractor feature_extractor_type;
typedef random_forest_regression_function<feature_extractor> trained_function_type;
typedef typename feature_extractor::sample_type sample_type;
random_forest_regression_trainer (
);
/*!
ensures
- #get_min_samples_per_leaf() == 5
- #get_num_trees() == 1000
- #get_feature_subsampling_frac() == 1.0/3.0
- #get_feature_extractor() == a default initialized feature extractor.
- #get_random_seed() == ""
- this object is not verbose.
!*/
const feature_extractor_type& get_feature_extractor (
) const;
void set_feature_extractor (
const feature_extractor_type& feat_extractor
);
void set_seed (
const std::string& seed
);
const std::string& get_random_seed (
) const;
size_t get_num_trees (
) const;
/*!
ensures
- Random forests built by this object will contain get_num_trees() trees.
!*/
void set_num_trees (
size_t num
);
/*!
requires
- num > 0
ensures
- #get_num_trees() == num
!*/
void set_feature_subsampling_fraction (
double frac
);
/*!
requires
- 0 < frac <= 1
ensures
- #get_feature_subsampling_frac() == frac
!*/
double get_feature_subsampling_frac(
) const;
/*!
ensures
- When we build trees, at each node we don't look at all the available
features. We consider only get_feature_subsampling_frac() fraction of
them at random.
!*/
void set_min_samples_per_leaf (
size_t num
);
/*!
requires
- num > 0
ensures
- #get_min_samples_per_leaf() == num
!*/
size_t get_min_samples_per_leaf(
) const;
/*!
ensures
- When building trees, each leaf node in a tree will contain at least
get_min_samples_per_leaf() samples.
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that the
progress of training can be tracked..
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
trained_function_type train (
const std::vector<sample_type>& x,
const std::vector<double>& y
) const;
trained_function_type train (
const std::vector<sample_type>& x,
const std::vector<double>& y,
std::vector<double>& oob_values // predicted y, basically like LOO-CV
) const;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_RANdOM_FOREST_REGRESION_ABSTRACT_H_
......@@ -119,6 +119,7 @@ set (tests
read_write_mutex.cpp
reference_counter.cpp
rls.cpp
random_forest.cpp
sammon.cpp
scan_image.cpp
sequence.cpp
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment