Commit 0b235fe5 authored by Davis King's avatar Davis King
Browse files

Added the repeat layer and generally optimized the code for really deep

networks.  This revolved mostly around removing really deep template recursions
since that upsets the compiler when you make really deep networks.
parent 7991275e
This diff is collapsed.
...@@ -69,48 +69,38 @@ namespace dlib ...@@ -69,48 +69,38 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
typename T, typename T
size_t N
> >
class sstack class sstack
{ {
/*! /*!
REQUIREMENTS ON T
- T is default and copy constructable.
REQUIREMENTS ON N
- N > 0
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This is a basic stack of T objects. It holds N of the objects and is This is a basic stack of T objects. It contains no data itself but simply
entirely allocated on the stack rather than on the heap. points to a memory range of T object and allows you to access that block of
T objects as a stack.
!*/ !*/
public: public:
typedef T value_type; typedef T value_type;
const static size_t num_elements = N;
sstack( sstack() = delete;
);
/*!
ensures
- #size() == N
- All elements of this stack are default constructed.
!*/
sstack( sstack (
const T& item T* data,
size_t s
); );
/*! /*!
ensures ensures
- #size() == N - #size() == s
- Initializes all N elements in this stack with the given item. E.g. - #top() == *data
top()==item, pop().top()==item, pop().pop().top()==item, etc. - #pop(i).top() == data[i]
!*/ !*/
const T& top( const T& top(
) const; ) const;
/*! /*!
requires
- size() != 0
ensures ensures
- returns the top element of the stack. - returns the top element of the stack.
!*/ !*/
...@@ -118,46 +108,41 @@ namespace dlib ...@@ -118,46 +108,41 @@ namespace dlib
T& top( T& top(
); );
/*! /*!
requires
- size() != 0
ensures ensures
- returns the top element of the stack. - returns the top element of the stack.
!*/ !*/
size_t size( size_t size(
) const; ) const;
/*! /*!
ensures ensures
- returns the number of elements in this stack. In particular, the number - returns the number of elements in this stack.
returned is always N.
!*/
const sstack<T,N-1>& pop(
) const;
/*!
requires
- size() > 1
ensures
- returns a reference to the sub-stack S such that:
- S.size() == size()-1.
- S.top() is the next element in the stack.
!*/ !*/
sstack<T,N-1>& pop( sstack pop(
size_t num = 1
); );
/*! /*!
requires requires
- size() > 1 - num < size()
ensures ensures
- returns a reference to the sub-stack S such that: - returns a reference to the sub-stack S such that:
- S.size() == size()-1. - S.size() == size()-num.
- S.top() is the next element in the stack. - S.top() is num elements down the stack.
!*/ !*/
}; };
void serialize(const sstack& item, std::ostream& out); template <
void deserialize(sstack& item, std::istream& in); typename T
>
sstack<T> make_sstack(
std::vector<T>& item
) { return sstack<T>(item.data(), item.size()); }
/*! /*!
provides serialization support ensures
- returns a sstack that sits on top of the given std::vector.
!*/ !*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
...@@ -180,6 +165,7 @@ namespace dlib ...@@ -180,6 +165,7 @@ namespace dlib
- SUBNET is an add_layer object. - SUBNET is an add_layer object.
- SUBNET is an add_tag_layer object. - SUBNET is an add_tag_layer object.
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
- SUBNET is a repeat object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object represents a deep neural network. In particular, it is a tool This object represents a deep neural network. In particular, it is a tool
...@@ -406,7 +392,7 @@ namespace dlib ...@@ -406,7 +392,7 @@ namespace dlib
template <typename solver_type> template <typename solver_type>
void update( void update(
const tensor& x, const tensor& x,
sstack<solver_type,num_layers>& solvers sstack<solver_type> solvers
); );
/*! /*!
requires requires
...@@ -415,9 +401,10 @@ namespace dlib ...@@ -415,9 +401,10 @@ namespace dlib
subsequently modified in any way. subsequently modified in any way.
- get_gradient_input() has been set equal to the gradient of this network's - get_gradient_input() has been set equal to the gradient of this network's
output with respect to some loss function. output with respect to some loss function.
- This instance of solvers has only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
ensures ensures
- Back propagates the error gradient, get_gradient_input(), through this - Back propagates the error gradient, get_gradient_input(), through this
network and uses the provided solvers to update the network parameters. network and uses the provided solvers to update the network parameters.
...@@ -431,7 +418,7 @@ namespace dlib ...@@ -431,7 +418,7 @@ namespace dlib
void update( void update(
const tensor& x, const tensor& x,
const tensor& gradient_input, const tensor& gradient_input,
sstack<solver_type,num_layers>& solvers sstack<solver_type> solvers
); );
/*! /*!
requires requires
...@@ -439,9 +426,10 @@ namespace dlib ...@@ -439,9 +426,10 @@ namespace dlib
Moreover, this was the most recent call to forward() and x has not been Moreover, this was the most recent call to forward() and x has not been
subsequently modified in any way. subsequently modified in any way.
- have_same_dimensions(gradient_input, get_output()) == true - have_same_dimensions(gradient_input, get_output()) == true
- This instance of solvers has only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
ensures ensures
- This function is identical to the version of update() defined immediately - This function is identical to the version of update() defined immediately
above except that it back-propagates gradient_input through the network above except that it back-propagates gradient_input through the network
...@@ -504,6 +492,7 @@ namespace dlib ...@@ -504,6 +492,7 @@ namespace dlib
- SUBNET is an add_layer object. - SUBNET is an add_layer object.
- SUBNET is an add_tag_layer object. - SUBNET is an add_tag_layer object.
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
- SUBNET is a repeat object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object represents a deep neural network. In particular, it is a tool This object represents a deep neural network. In particular, it is a tool
...@@ -766,7 +755,7 @@ namespace dlib ...@@ -766,7 +755,7 @@ namespace dlib
double update ( double update (
const tensor& x, const tensor& x,
label_iterator lbegin, label_iterator lbegin,
sstack<solver_type,num_layers>& solvers sstack<solver_type> solvers
); );
/*! /*!
requires requires
...@@ -774,9 +763,10 @@ namespace dlib ...@@ -774,9 +763,10 @@ namespace dlib
- x.num_samples() > 0 - x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of - lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor label_type elements. x.num_samples()/sample_expansion_factor label_type elements.
- This instance of solvers has only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
ensures ensures
- runs x through the network, compares the output to the expected output - runs x through the network, compares the output to the expected output
pointed to by lbegin, and updates the network parameters via pointed to by lbegin, and updates the network parameters via
...@@ -793,7 +783,7 @@ namespace dlib ...@@ -793,7 +783,7 @@ namespace dlib
input_iterator ibegin, input_iterator ibegin,
input_iterator iend, input_iterator iend,
label_iterator lbegin, label_iterator lbegin,
sstack<solver_type,num_layers>& solvers sstack<solver_type> solvers
); );
/*! /*!
requires requires
...@@ -801,9 +791,10 @@ namespace dlib ...@@ -801,9 +791,10 @@ namespace dlib
- std::distance(ibegin,iend) > 0 - std::distance(ibegin,iend) > 0
- lbegin == iterator pointing to the start of a range of - lbegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements. std::distance(ibegin,iend) label_type elements.
- This instance of solvers has only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
ensures ensures
- runs [ibegin,iend) through the network, compares the output to the - runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and updates the network parameters expected output pointed to by lbegin, and updates the network parameters
...@@ -820,16 +811,17 @@ namespace dlib ...@@ -820,16 +811,17 @@ namespace dlib
template <typename solver_type> template <typename solver_type>
double update ( double update (
const tensor& x, const tensor& x,
sstack<solver_type,num_layers>& solvers sstack<solver_type> solvers
); );
/*! /*!
requires requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type. - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- x.num_samples()%sample_expansion_factor == 0 - x.num_samples()%sample_expansion_factor == 0
- x.num_samples() > 0 - x.num_samples() > 0
- This instance of solvers has only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
ensures ensures
- runs x through the network and updates the network parameters by - runs x through the network and updates the network parameters by
back-propagating the loss gradient through the network. back-propagating the loss gradient through the network.
...@@ -842,16 +834,17 @@ namespace dlib ...@@ -842,16 +834,17 @@ namespace dlib
double update ( double update (
input_iterator ibegin, input_iterator ibegin,
input_iterator iend, input_iterator iend,
sstack<solver_type,num_layers>& solvers sstack<solver_type> solvers
); );
/*! /*!
requires requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type. - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects. - [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0 - std::distance(ibegin,iend) > 0
- This instance of solvers has only ever been used with this network. That - The given solvers have only ever been used with this network. That
is, if you want to call update() on some other neural network object then is, if you want to call update() on some other neural network object then
you must not reuse the same solvers object. you must NOT reuse the same solvers object.
- solvers.size() >= num_layers
ensures ensures
- runs [ibegin,iend) through the network and updates the network parameters - runs [ibegin,iend) through the network and updates the network parameters
by back-propagating the loss gradient through the network. by back-propagating the loss gradient through the network.
...@@ -881,6 +874,115 @@ namespace dlib ...@@ -881,6 +874,115 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
size_t num,
template<typename> class LAYER,
typename SUBNET
>
class repeat
{
/*!
REQUIREMENTS ON num
- num > 0
REQUIREMENTS ON LAYER
- LAYER must be a template that stacks more layers onto a deep neural
network. For example, if net_type were a network without a loss layer,
then it should be legal to create a deeper network with a type of
LAYER<net_type>.
REQUIREMENTS ON SUBNET
- One of the following must be true:
- SUBNET is an add_layer object.
- SUBNET is an add_tag_layer object.
- SUBNET is an add_skip_layer object.
- SUBNET is a repeat object.
WHAT THIS OBJECT REPRESENTS
This object adds more layers to a deep neural network. In particular, it
adds LAYER on top of SUBNET num times. So for example, if num were 2 then
repeat<2,LAYER,SUBNET> would create a network equivalent to LAYER<LAYER<SUBNET>>.
Also, this object provides an interface identical to the one defined by the
add_layer object except that we add the num_repetitions() and
get_repeated_layer() methods. These additions are shown below along with
some additional explanatory comments.
!*/
public:
typedef SUBNET subnet_type;
typedef typename SUBNET::input_type input_type;
const static size_t num_layers = (LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers;
const static unsigned int sample_expansion_factor = SUBNET::sample_expansion_factor;
typedef LAYER<an_unspecified_input_type> repeated_layer_type;
template <typename T, typename ...U>
repeat(
T arg1,
U ...args2
);
/*!
ensures
- arg1 is used to initialize the num_repetitions() copies of LAYER inside
this object. That is, all the LAYER elements are initialized identically
by being given copies of arg1.
- The rest of the arguments to the constructor, i.e. args2, are passed to
SUBNET's constructor.
!*/
size_t num_repetitions (
) const;
/*!
ensures
- returns num (i.e. the number of times LAYER was stacked on top of SUBNET)
!*/
const repeated_layer_type& get_repeated_layer (
size_t i
) const;
/*!
requires
- i < num_repetitions()
ensures
- returns a reference to the i-th instance of LAYER. For example,
get_repeated_layer(0) returns the instance of LAYER that is on the top of
the network while get_repeated_layer(num_repetitions()-1) returns the
instance of LAYER that is stacked immediately on top of SUBNET.
!*/
repeated_layer_type& get_repeated_layer (
size_t i
);
/*!
requires
- i < num_repetitions()
ensures
- returns a reference to the i-th instance of LAYER. For example,
get_repeated_layer(0) returns the instance of LAYER that is on the top of
the network while get_repeated_layer(num_repetitions()-1) returns the
instance of LAYER that is stacked immediately on top of SUBNET.
!*/
const subnet_type& subnet(
) const;
/*!
ensures
- returns the SUBNET base network that repeat sits on top of. If you want
to access the LAYER components then you must use get_repeated_layer().
!*/
subnet_type& subnet(
);
/*!
ensures
- returns the SUBNET base network that repeat sits on top of. If you want
to access the LAYER components then you must use get_repeated_layer().
!*/
};
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
...@@ -897,6 +999,7 @@ namespace dlib ...@@ -897,6 +999,7 @@ namespace dlib
- SUBNET is an add_layer object. - SUBNET is an add_layer object.
- SUBNET is an add_tag_layer object. - SUBNET is an add_tag_layer object.
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
- SUBNET is a repeat object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object adds a new layer to a deep neural network. However, this layer This object adds a new layer to a deep neural network. However, this layer
...@@ -942,6 +1045,7 @@ namespace dlib ...@@ -942,6 +1045,7 @@ namespace dlib
- SUBNET is an add_layer object. - SUBNET is an add_layer object.
- SUBNET is an add_tag_layer object. - SUBNET is an add_tag_layer object.
- SUBNET is an add_skip_layer object. - SUBNET is an add_skip_layer object.
- SUBNET is a repeat object.
WHAT THIS OBJECT REPRESENTS WHAT THIS OBJECT REPRESENTS
This object adds a new layer to a deep neural network which draws its This object adds a new layer to a deep neural network which draws its
......
...@@ -48,7 +48,7 @@ namespace dlib ...@@ -48,7 +48,7 @@ namespace dlib
dnn_trainer( dnn_trainer(
const net_type& net_, const net_type& net_,
const solver_type& solver_ const solver_type& solver_
) : job_pipe(0), net(net_), solvers(solver_) ) : job_pipe(0), net(net_), solvers(net_type::num_layers, solver_)
{ {
init(); init();
} }
...@@ -81,7 +81,7 @@ namespace dlib ...@@ -81,7 +81,7 @@ namespace dlib
) )
{ {
wait_for_thread_to_pause(); wait_for_thread_to_pause();
solvers = solver_; solvers = std::vector<solver_type>(net_type::num_layers, solver_);
} }
unsigned long get_mini_batch_size ( unsigned long get_mini_batch_size (
...@@ -119,14 +119,14 @@ namespace dlib ...@@ -119,14 +119,14 @@ namespace dlib
} }
const sstack<solver_type,net_type::num_layers>& get_solvers ( const std::vector<solver_type>& get_solvers (
) const ) const
{ {
wait_for_thread_to_pause(); wait_for_thread_to_pause();
return solvers; return solvers;
} }
sstack<solver_type,net_type::num_layers>& get_solvers ( std::vector<solver_type>& get_solvers (
) )
{ {
wait_for_thread_to_pause(); wait_for_thread_to_pause();
...@@ -260,7 +260,7 @@ namespace dlib ...@@ -260,7 +260,7 @@ namespace dlib
friend void serialize(const dnn_trainer& item, std::ostream& out) friend void serialize(const dnn_trainer& item, std::ostream& out)
{ {
item.wait_for_thread_to_pause(); item.wait_for_thread_to_pause();
int version = 1; int version = 2;
serialize(version, out); serialize(version, out);
serialize(item.rs, out); serialize(item.rs, out);
serialize(item.num_epochs, out); serialize(item.num_epochs, out);
...@@ -275,7 +275,7 @@ namespace dlib ...@@ -275,7 +275,7 @@ namespace dlib
item.wait_for_thread_to_pause(); item.wait_for_thread_to_pause();
int version = 0; int version = 0;
deserialize(version, in); deserialize(version, in);
if (version != 1) if (version != 2)
throw serialization_error("Unexpected version found while deserializing dlib::dnn_trainer."); throw serialization_error("Unexpected version found while deserializing dlib::dnn_trainer.");
deserialize(item.rs, in); deserialize(item.rs, in);
deserialize(item.num_epochs, in); deserialize(item.num_epochs, in);
...@@ -309,13 +309,13 @@ namespace dlib ...@@ -309,13 +309,13 @@ namespace dlib
template <typename T> template <typename T>
void run_update(job_t& next_job, const T&) void run_update(job_t& next_job, const T&)
{ {
rs.add(net.update(next_job.t, next_job.labels.begin(), solvers)); rs.add(net.update(next_job.t, next_job.labels.begin(), make_sstack(solvers)));
} }
void run_update(job_t& next_job, const no_label_type&) void run_update(job_t& next_job, const no_label_type&)
{ {
no_label_type pick_wich_run_update; no_label_type pick_wich_run_update;
rs.add(net.update(next_job.t, solvers)); rs.add(net.update(next_job.t, make_sstack(solvers)));
} }
void thread() void thread()
...@@ -361,7 +361,7 @@ namespace dlib ...@@ -361,7 +361,7 @@ namespace dlib
int cuda_device_id; int cuda_device_id;
net_type net; net_type net;
sstack<solver_type,net_type::num_layers> solvers; std::vector<solver_type> solvers;
}; };
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
......
...@@ -93,24 +93,30 @@ namespace dlib ...@@ -93,24 +93,30 @@ namespace dlib
assigned to each element in get_solvers(). assigned to each element in get_solvers().
!*/ !*/
const sstack<solver_type,net_type::num_layers>& get_solvers ( const std::vector<solver_type>& get_solvers (
) const; ) const;
/*! /*!
ensures ensures
- returns the solvers used to optimize each layer of the neural network - returns the solvers used to optimize each layer of the neural network
get_net(). In particular, the first layer's solver is get_net(). In particular, the first layer's solver is
get_solvers().top(), the second layer's solver is get_solvers()[0], the second layer's solver is
get_solvers().pop().top(), and so on. get_solvers()[1], and so on.
!*/ !*/
sstack<solver_type,net_type::num_layers>& get_solvers ( std::vector<solver_type>& get_solvers (
); );
/*! /*!
ensures ensures
- returns the solvers used to optimize each layer of the neural network - returns the solvers used to optimize each layer of the neural network
get_net(). In particular, the first layer's solver is get_net(). In particular, the first layer's solver is
get_solvers().top(), the second layer's solver is get_solvers()[0], the second layer's solver is
get_solvers().pop().top(), and so on. get_solvers()[1], and so on.
- It should be noted that you should never change the number of elements in
the vector returned by get_solvers() (i.e. don't do something that
changes get_solvers().size()). It will be set to net_type::num_layers by
this object and you should leave it at that. The non-const version of
get_solvers() is provided only so you can tweak the parameters of a
particular solver.
!*/ !*/
unsigned long get_mini_batch_size ( unsigned long get_mini_batch_size (
......
...@@ -974,8 +974,8 @@ namespace ...@@ -974,8 +974,8 @@ namespace
rcon_(6) rcon_(6)
); );
DLIB_TEST(layer<tag1>(net).num_layers == 9); DLIB_TEST(layer<tag1>(net).num_layers == 8);
DLIB_TEST(layer<skip1>(net).num_layers == 9+3+3+1); DLIB_TEST(layer<skip1>(net).num_layers == 8+3+3);
DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output()); DLIB_TEST(&layer<skip1>(net).get_output() == &layer<tag1>(net).get_output());
DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output()); DLIB_TEST(&layer<skip1>(net).get_output() != &layer<tag1>(net).subnet().subnet().get_output());
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment