"vscode:/vscode.git/clone" did not exist on "067e36b505ec229b684a0c6daa7960a0ed7b0fca"
Unverified Commit e7ec6b77 authored by Adrià Arrufat's avatar Adrià Arrufat Committed by GitHub
Browse files

Add visitor to remove bias from bn_ layer inputs (#closes 2155) (#2156)

* add visitor to remove bias from bn_ inputs (#closes 2155)

* remove unused parameter and make documentation more clear

* remove bias from bn_ layers too and use better name

* let the batch norm keep their bias, use even better name

* be more consistent with impl naming

* remove default constructor

* do not use method to prevent some errors

* add disable bias method to pertinent layers

* update dcgan example

- grammar
- print number of network parameters to be able to check bias is not allocated
- at the end, give feedback to the user about what the discriminator thinks about each generated sample

* fix fc_ logic

* add documentation

* add bias_is_disabled methods and update to_xml

* print use_bias=false when bias is disabled
parent ed22f040
......@@ -183,6 +183,28 @@ namespace dlib
impl::set_bias_weight_decay_multiplier(obj, special_(), bias_weight_decay_multiplier);
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <typename T, typename int_<decltype(&T::disable_bias)>::type = 0>
void disable_bias(
T& obj,
special_
) { obj.disable_bias(); }
template <typename T>
void disable_bias( const T& , general_) { }
}
template <typename T>
void disable_bias(
T& obj
)
{
impl::disable_bias(obj, special_());
}
// ----------------------------------------------------------------------------------------
namespace impl
......
......@@ -157,6 +157,20 @@ namespace dlib
- does nothing
!*/
// ----------------------------------------------------------------------------------------
template <typename T>
void disable_bias(
T& obj
);
/*!
ensures
- if (obj has a disable_bias() member function) then
- calls obj.disable_bias()
- else
- does nothing
!*/
// ----------------------------------------------------------------------------------------
bool dnn_prefer_fastest_algorithms(
......
This diff is collapsed.
......@@ -573,6 +573,22 @@ namespace dlib
- #get_bias_weight_decay_multiplier() == val
!*/
void disable_bias(
);
/*!
ensures
- bias_is_disabled() returns true
!*/
bool bias_is_disabled(
) const;
/*!
ensures
- returns true if bias learning is disabled for this layer. This means the biases will
not be learned during the training and they will not be used in the forward or backward
methods either.
!*/
alias_tensor_const_instance get_weights(
) const;
/*!
......@@ -903,6 +919,22 @@ namespace dlib
- #get_bias_weight_decay_multiplier() == val
!*/
void disable_bias(
);
/*!
ensures
- bias_is_disabled() returns true
!*/
bool bias_is_disabled(
) const;
/*!
ensures
- returns true if bias learning is disabled for this layer. This means the biases will
not be learned during the training and they will not be used in the forward or backward
methods either.
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
......@@ -1147,6 +1179,22 @@ namespace dlib
- #get_bias_weight_decay_multiplier() == val
!*/
void disable_bias(
);
/*!
ensures
- bias_is_disabled() returns true
!*/
bool bias_is_disabled(
) const;
/*!
ensures
- returns true if bias learning is disabled for this layer. This means the biases will
not be learned during the training and they will not be used in the forward or backward
methods either.
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
......@@ -1616,6 +1664,22 @@ namespace dlib
new_window_size.
!*/
// ----------------------------------------------------------------------------------------
template <typename net_type>
void set_all_bn_inputs_no_bias (
const net_type& net
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
ensures
- Disables bias for all bn_ layer inputs.
- Sets the get_bias_learning_rate_multiplier() and get_bias_weight_decay_multiplier()
to zero of all bn_ layer inputs.
!*/
// ----------------------------------------------------------------------------------------
class affine_
......
......@@ -10,7 +10,7 @@
by Alec Radford, Luke Metz, Soumith Chintala.
The main idea is that there are two neural networks training at the same time:
- the generator is in charge of generating images that look as close as possible as the
- the generator is in charge of generating images that look as close as possible to the
ones from the dataset.
- the discriminator will decide whether an image is fake (created by the generator) or real
(selected from the dataset).
......@@ -35,25 +35,6 @@
using namespace std;
using namespace dlib;
// We start by defining a simple visitor to disable bias learning in a network. By default,
// biases are initialized to 0, so setting the multipliers to 0 disables bias learning.
class visitor_no_bias
{
public:
template <typename input_layer_type>
void operator()(size_t , input_layer_type& ) const
{
// ignore other layers
}
template <typename T, typename U, typename E>
void operator()(size_t , add_layer<T, U, E>& l) const
{
set_bias_learning_rate_multiplier(l.layer_details(), 0);
set_bias_weight_decay_multiplier(l.layer_details(), 0);
}
};
// Some helper definitions for the noise generation
const size_t noise_size = 100;
using noise_t = std::array<matrix<float, 1, 1>, noise_size>;
......@@ -149,16 +130,15 @@ int main(int argc, char** argv) try
// Instantiate both generator and discriminator
generator_type generator;
discriminator_type discriminator(
leaky_relu_(0.2), leaky_relu_(0.2), leaky_relu_(0.2));
// Remove the bias learning from the networks
visit_layers(generator, visitor_no_bias());
visit_layers(discriminator, visitor_no_bias());
discriminator_type discriminator(leaky_relu_(0.2), leaky_relu_(0.2), leaky_relu_(0.2));
// Remove the bias learning from all bn_ inputs in both networks
set_all_bn_inputs_no_bias(generator);
set_all_bn_inputs_no_bias(discriminator);
// Forward random noise so that we see the tensor size at each layer
discriminator(generate_image(generator, make_noise(rnd)));
cout << "generator" << endl;
cout << "generator (" << count_parameters(generator) << " parameters)" << endl;
cout << generator << endl;
cout << "discriminator" << endl;
cout << "discriminator (" << count_parameters(discriminator) << " parameters)" << endl;
cout << discriminator << endl;
// The solvers for the generator and discriminator networks. In this example, we are going to
......@@ -257,8 +237,11 @@ int main(int argc, char** argv) try
// output.
while (!win.is_closed())
{
win.set_image(generate_image(generator, make_noise(rnd)));
cout << "Hit enter to generate a new image";
const auto image = generate_image(generator, make_noise(rnd));
const auto real = discriminator(image) > 0;
win.set_image(image);
cout << "The discriminator thinks it's " << (real ? "real" : "fake");
cout << ". Hit enter to generate a new image";
cin.get();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment