"vscode:/vscode.git/clone" did not exist on "b45969a7d53b5b6a15a931f4514d4f392ef218b6"
Commit 6acddf99 authored by Davis King's avatar Davis King
Browse files

Just renamed variables to reflect the new meaning of the batch normalization

running variance output.
parent 538de238
......@@ -466,7 +466,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
)
{
DLIB_CASSERT(
......@@ -476,7 +476,7 @@ namespace dlib
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
......@@ -489,10 +489,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
......@@ -504,14 +504,14 @@ namespace dlib
auto g = gamma.host();
auto b = beta.host();
auto m = running_means.host();
auto i = running_invstds.host();
auto v = running_variances.host();
const long num = src.k()*src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < num; ++k)
{
*d = g[k]*(*s - m[k])/std::sqrt(i[k]+dlib::tt::BATCH_NORM_EPS) + b[k];
*d = g[k]*(*s - m[k])/std::sqrt(v[k]+dlib::tt::BATCH_NORM_EPS) + b[k];
++d;
++s;
}
......@@ -524,7 +524,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -532,7 +532,7 @@ namespace dlib
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
......@@ -580,8 +580,9 @@ namespace dlib
invstds.host(); means.host();
// compute variances
running_invstds.copy_size(invstds);
auto rvar = running_invstds.host();
running_variances.copy_size(invstds);
auto rvar = running_variances.host();
// This scale makes the running variances unbiased.
const double scale = (src.num_samples())/(src.num_samples()-1.0);
for (long i = 0; i < num; ++i)
{
......@@ -718,7 +719,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
)
{
DLIB_CASSERT(
......@@ -728,7 +729,7 @@ namespace dlib
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
......@@ -741,10 +742,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
......@@ -756,14 +757,14 @@ namespace dlib
auto g = gamma.host();
auto b = beta.host();
auto m = running_means.host();
auto i = running_invstds.host();
auto v = running_variances.host();
const long num = src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n)
{
for (long k = 0; k < src.k(); ++k)
{
const float invstd = 1.0f/std::sqrt(i[k] + dlib::tt::BATCH_NORM_EPS);
const float invstd = 1.0f/std::sqrt(v[k] + dlib::tt::BATCH_NORM_EPS);
for (long j = 0; j < num; ++j)
{
*d = g[k]*(*s - m[k])*invstd + b[k];
......@@ -780,7 +781,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -788,7 +789,7 @@ namespace dlib
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
......@@ -844,8 +845,9 @@ namespace dlib
p_src = src.host();
// compute variances
running_invstds.copy_size(invstds);
auto rvar = running_invstds.host();
running_variances.copy_size(invstds);
auto rvar = running_variances.host();
// This scale makes the running variances unbiased.
const double scale = (src.num_samples()*num)/(src.num_samples()*num-1.0);
for (long k = 0; k < src.k(); ++k)
{
......
......@@ -120,7 +120,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
);
void batch_normalize (
......@@ -129,7 +129,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -152,7 +152,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
);
void batch_normalize_conv (
......@@ -161,7 +161,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......
......@@ -343,7 +343,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
)
{
DLIB_CASSERT(
......@@ -353,7 +353,7 @@ namespace dlib
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
......@@ -366,10 +366,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
......@@ -392,7 +392,7 @@ namespace dlib
gamma.device(),
beta.device(),
running_means.device(),
running_invstds.device(),
running_variances.device(),
dlib::tt::BATCH_NORM_EPS));
}
......@@ -402,7 +402,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -410,7 +410,7 @@ namespace dlib
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
......@@ -438,7 +438,7 @@ namespace dlib
means.set_size(1, src.k(), src.nr(), src.nc());
invstds.copy_size(means);
running_means.copy_size(means);
running_invstds.copy_size(means);
running_variances.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(),
......@@ -454,7 +454,7 @@ namespace dlib
beta.device(),
averaging_factor,
running_means.device(),
running_invstds.device(),
running_variances.device(),
dlib::tt::BATCH_NORM_EPS,
means.device(),
invstds.device()));
......@@ -516,7 +516,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
)
{
DLIB_CASSERT(
......@@ -526,7 +526,7 @@ namespace dlib
gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds),
have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() <<
......@@ -539,10 +539,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() <<
"\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc()
......@@ -565,7 +565,7 @@ namespace dlib
gamma.device(),
beta.device(),
running_means.device(),
running_invstds.device(),
running_variances.device(),
dlib::tt::BATCH_NORM_EPS));
}
......@@ -575,7 +575,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -583,7 +583,7 @@ namespace dlib
{
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT(
src.num_samples() > 1 &&
gamma.num_samples() == 1 &&
......@@ -612,7 +612,7 @@ namespace dlib
means.set_size(1, src.k());
invstds.copy_size(means);
running_means.copy_size(means);
running_invstds.copy_size(means);
running_variances.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(),
......@@ -628,7 +628,7 @@ namespace dlib
beta.device(),
averaging_factor,
running_means.device(),
running_invstds.device(),
running_variances.device(),
dlib::tt::BATCH_NORM_EPS,
means.device(),
invstds.device()));
......
......@@ -140,7 +140,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
);
void batch_normalize (
......@@ -149,7 +149,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -174,7 +174,7 @@ namespace dlib
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
);
void batch_normalize_conv (
......@@ -183,7 +183,7 @@ namespace dlib
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......
......@@ -453,9 +453,9 @@ namespace dlib
beta(params,gamma.size()) = 0;
running_means.copy_size(gamma(params,0));
running_invstds.copy_size(gamma(params,0));
running_variances.copy_size(gamma(params,0));
running_means = 0;
running_invstds = 1;
running_variances = 1;
num_updates = 0;
}
......@@ -470,16 +470,16 @@ namespace dlib
if (num_updates <running_stats_window_size)
++num_updates;
if (mode == FC_MODE)
tt::batch_normalize(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b);
tt::batch_normalize(output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
else
tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b);
tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
}
else // we are running in testing mode so we just linearly scale the input tensor.
{
if (mode == FC_MODE)
tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_invstds);
tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_variances);
else
tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_invstds);
tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_variances);
}
}
......@@ -510,7 +510,7 @@ namespace dlib
serialize(item.means, out);
serialize(item.invstds, out);
serialize(item.running_means, out);
serialize(item.running_invstds, out);
serialize(item.running_variances, out);
serialize(item.num_updates, out);
serialize(item.running_stats_window_size, out);
}
......@@ -539,7 +539,7 @@ namespace dlib
deserialize(item.means, in);
deserialize(item.invstds, in);
deserialize(item.running_means, in);
deserialize(item.running_invstds, in);
deserialize(item.running_variances, in);
deserialize(item.num_updates, in);
deserialize(item.running_stats_window_size, in);
......@@ -551,9 +551,9 @@ namespace dlib
deserialize(_mode, in);
if (mode != (layer_mode)_mode) throw serialization_error("Wrong mode found while deserializing dlib::bn_");
// We also need to flip the running_invstds around since the previous
// We also need to flip the running_variances around since the previous
// format saved the inverse standard deviations instead of variances.
item.running_invstds = 1.0f/squared(mat(item.running_invstds)) - tt::BATCH_NORM_EPS;
item.running_variances = 1.0f/squared(mat(item.running_variances)) - tt::BATCH_NORM_EPS;
}
}
......@@ -564,7 +564,7 @@ namespace dlib
resizable_tensor params;
alias_tensor gamma, beta;
resizable_tensor means, running_means;
resizable_tensor invstds, running_invstds;
resizable_tensor invstds, running_variances;
unsigned long num_updates;
unsigned long running_stats_window_size;
};
......@@ -911,7 +911,7 @@ namespace dlib
auto sg = gamma(temp,0);
auto sb = beta(temp,gamma.size());
g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_invstds)+tt::BATCH_NORM_EPS));
g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_variances)+tt::BATCH_NORM_EPS));
b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means));
}
......
......@@ -274,13 +274,13 @@ namespace dlib { namespace tt
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds);
cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances);
#else
cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds);
cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances);
#endif
}
......@@ -290,16 +290,16 @@ namespace dlib { namespace tt
resizable_tensor& vars,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else
cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif
}
......@@ -330,13 +330,13 @@ namespace dlib { namespace tt
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds);
cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances);
#else
cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds);
cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances);
#endif
}
......@@ -346,16 +346,16 @@ namespace dlib { namespace tt
resizable_tensor& vars,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
)
{
#ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else
cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta);
cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif
}
......
......@@ -294,7 +294,7 @@ namespace dlib { namespace tt
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
);
/*!
requires
......@@ -304,12 +304,12 @@ namespace dlib { namespace tt
- gamma.k() == src.k()
- have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds)
- have_same_dimensions(gamma, running_variances)
ensures
- Just linearly transforms src as a call to batch_normalize() would if the resulting
means and invstds were running_means and running_invstds. That is, this function
performs:
dest = gamma*(src-running_means)*running_invstds + beta
- Linearly transforms src as a call to batch_normalize() would if src had means
and variances as given by running_means and running_variances. That is, this
function performs:
dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta
Note that it does it in a pointwise fashion over the samples in src.
!*/
......@@ -319,7 +319,7 @@ namespace dlib { namespace tt
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -335,7 +335,7 @@ namespace dlib { namespace tt
- 0 <= averaging_factor <= 1
- if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true
- have_same_dimensions(running_variances, invstds) == true
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1
......@@ -347,7 +347,7 @@ namespace dlib { namespace tt
- #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds);
- #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src);
!*/
void batch_normalize_gradient (
......@@ -391,7 +391,7 @@ namespace dlib { namespace tt
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_invstds
const tensor& running_variances
);
/*!
requires
......@@ -401,13 +401,13 @@ namespace dlib { namespace tt
- gamma.k() == src.k()
- have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds)
- have_same_dimensions(gamma, running_variances)
ensures
- Just linearly transforms src as a call to batch_normalize_conv() would if the resulting
means and invstds were running_means and running_invstds. That is, this function
performs:
dest = gamma*(src-running_means)*running_invstds + beta
Note that it does it in a pointwise fashion over the samples, rows, and
- Linearly transforms src as a call to batch_normalize_conv() would if src had
means and variances as given by running_means and running_variances. That
is, this function performs:
dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta
Note that it does this in a pointwise fashion over the samples, rows, and
columns in src.
!*/
......@@ -417,7 +417,7 @@ namespace dlib { namespace tt
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_invstds,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
......@@ -431,7 +431,7 @@ namespace dlib { namespace tt
- 0 <= averaging_factor <= 1
- if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true
- have_same_dimensions(running_variances, invstds) == true
ensures
- have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1
......@@ -441,7 +441,7 @@ namespace dlib { namespace tt
- #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds);
- #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src);
!*/
void batch_normalize_conv_gradient (
......
......@@ -164,12 +164,12 @@ namespace
beta = 0;
resizable_tensor running_means;
resizable_tensor running_invstds;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
resizable_tensor running_variances;
batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
const double scale = (src.num_samples())/(src.num_samples()-1.0);
// Turn back into biased variance estimate because that's how batch_normalize() works, so if we want to match it this is necessary.
running_invstds = mat(running_invstds)/scale;
batch_normalize_inference(dest2, src, gamma, beta, running_means, running_invstds);
running_variances = mat(running_variances)/scale;
batch_normalize_inference(dest2, src, gamma, beta, running_means, running_variances);
DLIB_TEST_MSG(max(abs(mat(dest2)-mat(dest))) < 1e-5, max(abs(mat(dest2)-mat(dest))));
......@@ -177,7 +177,7 @@ namespace
auto f = [&](float eps) {
const float old = src.host()[idx];
src.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest);
src.host()[idx] = old;
return result;
......@@ -189,7 +189,7 @@ namespace
auto f = [&](float eps) {
const float old = gamma.host()[idx];
gamma.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest);
gamma.host()[idx] = old;
return result;
......@@ -201,7 +201,7 @@ namespace
auto f = [&](float eps) {
const float old = beta.host()[idx];
beta.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest);
beta.host()[idx] = old;
return result;
......@@ -247,13 +247,13 @@ namespace
beta = 0;
resizable_tensor running_means;
resizable_tensor running_invstds;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
resizable_tensor running_variances;
batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
const double scale = (src.num_samples()*src.nr()*src.nc())/(src.num_samples()*src.nr()*src.nc()-1.0);
// Turn back into biased variance estimate because that's how
// batch_normalize_conv() works, so if we want to match it this is necessary.
running_invstds = mat(running_invstds)/scale;
batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_invstds);
running_variances = mat(running_variances)/scale;
batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_variances);
DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5);
......@@ -261,7 +261,7 @@ namespace
auto f = [&](float eps) {
const float old = src.host()[idx];
src.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest);
src.host()[idx] = old;
return result;
......@@ -273,7 +273,7 @@ namespace
auto f = [&](float eps) {
const float old = gamma.host()[idx];
gamma.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest);
gamma.host()[idx] = old;
return result;
......@@ -285,7 +285,7 @@ namespace
auto f = [&](float eps) {
const float old = beta.host()[idx];
beta.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta);
batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest);
beta.host()[idx] = old;
return result;
......@@ -775,7 +775,7 @@ namespace
resizable_tensor means, means2;
resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2;
resizable_tensor running_variances, running_variances2;
resizable_tensor src(64,20,100,100);
resizable_tensor gamma(1,20,100,100);
resizable_tensor beta(1,20,100,100);
......@@ -785,20 +785,20 @@ namespace
rnd.fill_uniform(src);
cpu::batch_normalize(dest, means, invstds, 1, running_means, running_invstds, src, gamma, beta);
cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_invstds2, src, gamma, beta);
cpu::batch_normalize(dest, means, invstds, 1, running_means, running_variances, src, gamma, beta);
cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_variances2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2)));
dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4);
// now check that the gradients match as well
......@@ -830,7 +830,7 @@ namespace
resizable_tensor means, means2;
resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2;
resizable_tensor running_variances, running_variances2;
resizable_tensor src(2,8,10,9);
resizable_tensor gamma(1,8);
resizable_tensor beta(1,8);
......@@ -839,20 +839,20 @@ namespace
tt::tensor_rand rnd;
rnd.fill_uniform(src);
cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_invstds, src, gamma, beta);
cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_invstds2, src, gamma, beta);
cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_variances, src, gamma, beta);
cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_variances2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2)));
dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4);
resizable_tensor gradient_input;
resizable_tensor src_grad, gamma_grad, beta_grad;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment