"docs/git@developer.sourcefind.cn:change/sglang.git" did not exist on "b32ab0705edb8401d93b47e28dc5fc4d37b2e39b"
Commit 6acddf99 authored by Davis King's avatar Davis King
Browse files

Just renamed variables to reflect the new meaning of the batch normalization

running variance output.
parent 538de238
...@@ -466,7 +466,7 @@ namespace dlib ...@@ -466,7 +466,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -476,7 +476,7 @@ namespace dlib ...@@ -476,7 +476,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -489,10 +489,10 @@ namespace dlib ...@@ -489,10 +489,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -504,14 +504,14 @@ namespace dlib ...@@ -504,14 +504,14 @@ namespace dlib
auto g = gamma.host(); auto g = gamma.host();
auto b = beta.host(); auto b = beta.host();
auto m = running_means.host(); auto m = running_means.host();
auto i = running_invstds.host(); auto v = running_variances.host();
const long num = src.k()*src.nr()*src.nc(); const long num = src.k()*src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n) for (long n = 0; n < src.num_samples(); ++n)
{ {
for (long k = 0; k < num; ++k) for (long k = 0; k < num; ++k)
{ {
*d = g[k]*(*s - m[k])/std::sqrt(i[k]+dlib::tt::BATCH_NORM_EPS) + b[k]; *d = g[k]*(*s - m[k])/std::sqrt(v[k]+dlib::tt::BATCH_NORM_EPS) + b[k];
++d; ++d;
++s; ++s;
} }
...@@ -524,7 +524,7 @@ namespace dlib ...@@ -524,7 +524,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -532,7 +532,7 @@ namespace dlib ...@@ -532,7 +532,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -580,8 +580,9 @@ namespace dlib ...@@ -580,8 +580,9 @@ namespace dlib
invstds.host(); means.host(); invstds.host(); means.host();
// compute variances // compute variances
running_invstds.copy_size(invstds); running_variances.copy_size(invstds);
auto rvar = running_invstds.host(); auto rvar = running_variances.host();
// This scale makes the running variances unbiased.
const double scale = (src.num_samples())/(src.num_samples()-1.0); const double scale = (src.num_samples())/(src.num_samples()-1.0);
for (long i = 0; i < num; ++i) for (long i = 0; i < num; ++i)
{ {
...@@ -718,7 +719,7 @@ namespace dlib ...@@ -718,7 +719,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -728,7 +729,7 @@ namespace dlib ...@@ -728,7 +729,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -741,10 +742,10 @@ namespace dlib ...@@ -741,10 +742,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -756,14 +757,14 @@ namespace dlib ...@@ -756,14 +757,14 @@ namespace dlib
auto g = gamma.host(); auto g = gamma.host();
auto b = beta.host(); auto b = beta.host();
auto m = running_means.host(); auto m = running_means.host();
auto i = running_invstds.host(); auto v = running_variances.host();
const long num = src.nr()*src.nc(); const long num = src.nr()*src.nc();
for (long n = 0; n < src.num_samples(); ++n) for (long n = 0; n < src.num_samples(); ++n)
{ {
for (long k = 0; k < src.k(); ++k) for (long k = 0; k < src.k(); ++k)
{ {
const float invstd = 1.0f/std::sqrt(i[k] + dlib::tt::BATCH_NORM_EPS); const float invstd = 1.0f/std::sqrt(v[k] + dlib::tt::BATCH_NORM_EPS);
for (long j = 0; j < num; ++j) for (long j = 0; j < num; ++j)
{ {
*d = g[k]*(*s - m[k])*invstd + b[k]; *d = g[k]*(*s - m[k])*invstd + b[k];
...@@ -780,7 +781,7 @@ namespace dlib ...@@ -780,7 +781,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -788,7 +789,7 @@ namespace dlib ...@@ -788,7 +789,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -844,8 +845,9 @@ namespace dlib ...@@ -844,8 +845,9 @@ namespace dlib
p_src = src.host(); p_src = src.host();
// compute variances // compute variances
running_invstds.copy_size(invstds); running_variances.copy_size(invstds);
auto rvar = running_invstds.host(); auto rvar = running_variances.host();
// This scale makes the running variances unbiased.
const double scale = (src.num_samples()*num)/(src.num_samples()*num-1.0); const double scale = (src.num_samples()*num)/(src.num_samples()*num-1.0);
for (long k = 0; k < src.k(); ++k) for (long k = 0; k < src.k(); ++k)
{ {
......
...@@ -120,7 +120,7 @@ namespace dlib ...@@ -120,7 +120,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize ( void batch_normalize (
...@@ -129,7 +129,7 @@ namespace dlib ...@@ -129,7 +129,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -152,7 +152,7 @@ namespace dlib ...@@ -152,7 +152,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize_conv ( void batch_normalize_conv (
...@@ -161,7 +161,7 @@ namespace dlib ...@@ -161,7 +161,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
......
...@@ -343,7 +343,7 @@ namespace dlib ...@@ -343,7 +343,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -353,7 +353,7 @@ namespace dlib ...@@ -353,7 +353,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -366,10 +366,10 @@ namespace dlib ...@@ -366,10 +366,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -392,7 +392,7 @@ namespace dlib ...@@ -392,7 +392,7 @@ namespace dlib
gamma.device(), gamma.device(),
beta.device(), beta.device(),
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS)); dlib::tt::BATCH_NORM_EPS));
} }
...@@ -402,7 +402,7 @@ namespace dlib ...@@ -402,7 +402,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -410,7 +410,7 @@ namespace dlib ...@@ -410,7 +410,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -438,7 +438,7 @@ namespace dlib ...@@ -438,7 +438,7 @@ namespace dlib
means.set_size(1, src.k(), src.nr(), src.nc()); means.set_size(1, src.k(), src.nr(), src.nc());
invstds.copy_size(means); invstds.copy_size(means);
running_means.copy_size(means); running_means.copy_size(means);
running_invstds.copy_size(means); running_variances.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining( CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(), context(),
...@@ -454,7 +454,7 @@ namespace dlib ...@@ -454,7 +454,7 @@ namespace dlib
beta.device(), beta.device(),
averaging_factor, averaging_factor,
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS, dlib::tt::BATCH_NORM_EPS,
means.device(), means.device(),
invstds.device())); invstds.device()));
...@@ -516,7 +516,7 @@ namespace dlib ...@@ -516,7 +516,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
DLIB_CASSERT( DLIB_CASSERT(
...@@ -526,7 +526,7 @@ namespace dlib ...@@ -526,7 +526,7 @@ namespace dlib
gamma.k() == src.k() && gamma.k() == src.k() &&
have_same_dimensions(gamma, beta) && have_same_dimensions(gamma, beta) &&
have_same_dimensions(gamma, running_means) && have_same_dimensions(gamma, running_means) &&
have_same_dimensions(gamma, running_invstds), have_same_dimensions(gamma, running_variances),
"\ngamma.num_samples(): " << gamma.num_samples() << "\ngamma.num_samples(): " << gamma.num_samples() <<
"\ngamma.k(): " << gamma.k() << "\ngamma.k(): " << gamma.k() <<
"\ngamma.nr(): " << gamma.nr() << "\ngamma.nr(): " << gamma.nr() <<
...@@ -539,10 +539,10 @@ namespace dlib ...@@ -539,10 +539,10 @@ namespace dlib
"\nrunning_means.k(): " << running_means.k() << "\nrunning_means.k(): " << running_means.k() <<
"\nrunning_means.nr(): " << running_means.nr() << "\nrunning_means.nr(): " << running_means.nr() <<
"\nrunning_means.nc(): " << running_means.nc() << "\nrunning_means.nc(): " << running_means.nc() <<
"\nrunning_invstds.num_samples(): " << running_invstds.num_samples() << "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
"\nrunning_invstds.k(): " << running_invstds.k() << "\nrunning_variances.k(): " << running_variances.k() <<
"\nrunning_invstds.nr(): " << running_invstds.nr() << "\nrunning_variances.nr(): " << running_variances.nr() <<
"\nrunning_invstds.nc(): " << running_invstds.nc() << "\nrunning_variances.nc(): " << running_variances.nc() <<
"\nsrc.k(): " << src.k() << "\nsrc.k(): " << src.k() <<
"\nsrc.nr(): " << src.nr() << "\nsrc.nr(): " << src.nr() <<
"\nsrc.nc(): " << src.nc() "\nsrc.nc(): " << src.nc()
...@@ -565,7 +565,7 @@ namespace dlib ...@@ -565,7 +565,7 @@ namespace dlib
gamma.device(), gamma.device(),
beta.device(), beta.device(),
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS)); dlib::tt::BATCH_NORM_EPS));
} }
...@@ -575,7 +575,7 @@ namespace dlib ...@@ -575,7 +575,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -583,7 +583,7 @@ namespace dlib ...@@ -583,7 +583,7 @@ namespace dlib
{ {
DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor); DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means),"");
DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_invstds,invstds),""); DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds),"");
DLIB_CASSERT( DLIB_CASSERT(
src.num_samples() > 1 && src.num_samples() > 1 &&
gamma.num_samples() == 1 && gamma.num_samples() == 1 &&
...@@ -612,7 +612,7 @@ namespace dlib ...@@ -612,7 +612,7 @@ namespace dlib
means.set_size(1, src.k()); means.set_size(1, src.k());
invstds.copy_size(means); invstds.copy_size(means);
running_means.copy_size(means); running_means.copy_size(means);
running_invstds.copy_size(means); running_variances.copy_size(means);
CHECK_CUDNN(cudnnBatchNormalizationForwardTraining( CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
context(), context(),
...@@ -628,7 +628,7 @@ namespace dlib ...@@ -628,7 +628,7 @@ namespace dlib
beta.device(), beta.device(),
averaging_factor, averaging_factor,
running_means.device(), running_means.device(),
running_invstds.device(), running_variances.device(),
dlib::tt::BATCH_NORM_EPS, dlib::tt::BATCH_NORM_EPS,
means.device(), means.device(),
invstds.device())); invstds.device()));
......
...@@ -140,7 +140,7 @@ namespace dlib ...@@ -140,7 +140,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize ( void batch_normalize (
...@@ -149,7 +149,7 @@ namespace dlib ...@@ -149,7 +149,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -174,7 +174,7 @@ namespace dlib ...@@ -174,7 +174,7 @@ namespace dlib
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
void batch_normalize_conv ( void batch_normalize_conv (
...@@ -183,7 +183,7 @@ namespace dlib ...@@ -183,7 +183,7 @@ namespace dlib
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
......
...@@ -453,9 +453,9 @@ namespace dlib ...@@ -453,9 +453,9 @@ namespace dlib
beta(params,gamma.size()) = 0; beta(params,gamma.size()) = 0;
running_means.copy_size(gamma(params,0)); running_means.copy_size(gamma(params,0));
running_invstds.copy_size(gamma(params,0)); running_variances.copy_size(gamma(params,0));
running_means = 0; running_means = 0;
running_invstds = 1; running_variances = 1;
num_updates = 0; num_updates = 0;
} }
...@@ -470,16 +470,16 @@ namespace dlib ...@@ -470,16 +470,16 @@ namespace dlib
if (num_updates <running_stats_window_size) if (num_updates <running_stats_window_size)
++num_updates; ++num_updates;
if (mode == FC_MODE) if (mode == FC_MODE)
tt::batch_normalize(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b); tt::batch_normalize(output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
else else
tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_invstds, sub.get_output(), g, b); tt::batch_normalize_conv(output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
} }
else // we are running in testing mode so we just linearly scale the input tensor. else // we are running in testing mode so we just linearly scale the input tensor.
{ {
if (mode == FC_MODE) if (mode == FC_MODE)
tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_invstds); tt::batch_normalize_inference(output, sub.get_output(), g, b, running_means, running_variances);
else else
tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_invstds); tt::batch_normalize_conv_inference(output, sub.get_output(), g, b, running_means, running_variances);
} }
} }
...@@ -510,7 +510,7 @@ namespace dlib ...@@ -510,7 +510,7 @@ namespace dlib
serialize(item.means, out); serialize(item.means, out);
serialize(item.invstds, out); serialize(item.invstds, out);
serialize(item.running_means, out); serialize(item.running_means, out);
serialize(item.running_invstds, out); serialize(item.running_variances, out);
serialize(item.num_updates, out); serialize(item.num_updates, out);
serialize(item.running_stats_window_size, out); serialize(item.running_stats_window_size, out);
} }
...@@ -539,7 +539,7 @@ namespace dlib ...@@ -539,7 +539,7 @@ namespace dlib
deserialize(item.means, in); deserialize(item.means, in);
deserialize(item.invstds, in); deserialize(item.invstds, in);
deserialize(item.running_means, in); deserialize(item.running_means, in);
deserialize(item.running_invstds, in); deserialize(item.running_variances, in);
deserialize(item.num_updates, in); deserialize(item.num_updates, in);
deserialize(item.running_stats_window_size, in); deserialize(item.running_stats_window_size, in);
...@@ -551,9 +551,9 @@ namespace dlib ...@@ -551,9 +551,9 @@ namespace dlib
deserialize(_mode, in); deserialize(_mode, in);
if (mode != (layer_mode)_mode) throw serialization_error("Wrong mode found while deserializing dlib::bn_"); if (mode != (layer_mode)_mode) throw serialization_error("Wrong mode found while deserializing dlib::bn_");
// We also need to flip the running_invstds around since the previous // We also need to flip the running_variances around since the previous
// format saved the inverse standard deviations instead of variances. // format saved the inverse standard deviations instead of variances.
item.running_invstds = 1.0f/squared(mat(item.running_invstds)) - tt::BATCH_NORM_EPS; item.running_variances = 1.0f/squared(mat(item.running_variances)) - tt::BATCH_NORM_EPS;
} }
} }
...@@ -564,7 +564,7 @@ namespace dlib ...@@ -564,7 +564,7 @@ namespace dlib
resizable_tensor params; resizable_tensor params;
alias_tensor gamma, beta; alias_tensor gamma, beta;
resizable_tensor means, running_means; resizable_tensor means, running_means;
resizable_tensor invstds, running_invstds; resizable_tensor invstds, running_variances;
unsigned long num_updates; unsigned long num_updates;
unsigned long running_stats_window_size; unsigned long running_stats_window_size;
}; };
...@@ -911,7 +911,7 @@ namespace dlib ...@@ -911,7 +911,7 @@ namespace dlib
auto sg = gamma(temp,0); auto sg = gamma(temp,0);
auto sb = beta(temp,gamma.size()); auto sb = beta(temp,gamma.size());
g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_invstds)+tt::BATCH_NORM_EPS)); g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_variances)+tt::BATCH_NORM_EPS));
b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means)); b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means));
} }
......
...@@ -274,13 +274,13 @@ namespace dlib { namespace tt ...@@ -274,13 +274,13 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds); cuda::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances);
#else #else
cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_invstds); cpu::batch_normalize_inference(dest,src,gamma,beta,running_means,running_variances);
#endif #endif
} }
...@@ -290,16 +290,16 @@ namespace dlib { namespace tt ...@@ -290,16 +290,16 @@ namespace dlib { namespace tt
resizable_tensor& vars, resizable_tensor& vars,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cuda::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else #else
cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cpu::batch_normalize(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif #endif
} }
...@@ -330,13 +330,13 @@ namespace dlib { namespace tt ...@@ -330,13 +330,13 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds); cuda::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances);
#else #else
cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_invstds); cpu::batch_normalize_conv_inference(dest,src,gamma,beta,running_means,running_variances);
#endif #endif
} }
...@@ -346,16 +346,16 @@ namespace dlib { namespace tt ...@@ -346,16 +346,16 @@ namespace dlib { namespace tt
resizable_tensor& vars, resizable_tensor& vars,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
) )
{ {
#ifdef DLIB_USE_CUDA #ifdef DLIB_USE_CUDA
cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cuda::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#else #else
cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_invstds,src,gamma,beta); cpu::batch_normalize_conv(dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta);
#endif #endif
} }
......
...@@ -294,7 +294,7 @@ namespace dlib { namespace tt ...@@ -294,7 +294,7 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
/*! /*!
requires requires
...@@ -304,12 +304,12 @@ namespace dlib { namespace tt ...@@ -304,12 +304,12 @@ namespace dlib { namespace tt
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(gamma, beta) - have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means) - have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds) - have_same_dimensions(gamma, running_variances)
ensures ensures
- Just linearly transforms src as a call to batch_normalize() would if the resulting - Linearly transforms src as a call to batch_normalize() would if src had means
means and invstds were running_means and running_invstds. That is, this function and variances as given by running_means and running_variances. That is, this
performs: function performs:
dest = gamma*(src-running_means)*running_invstds + beta dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta
Note that it does it in a pointwise fashion over the samples in src. Note that it does it in a pointwise fashion over the samples in src.
!*/ !*/
...@@ -319,7 +319,7 @@ namespace dlib { namespace tt ...@@ -319,7 +319,7 @@ namespace dlib { namespace tt
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -335,7 +335,7 @@ namespace dlib { namespace tt ...@@ -335,7 +335,7 @@ namespace dlib { namespace tt
- 0 <= averaging_factor <= 1 - 0 <= averaging_factor <= 1
- if (averaging_factor != 1) - if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true - have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true - have_same_dimensions(running_variances, invstds) == true
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples() == 1 - #means.num_samples() == 1
...@@ -347,7 +347,7 @@ namespace dlib { namespace tt ...@@ -347,7 +347,7 @@ namespace dlib { namespace tt
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src). - #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means); - #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds); - #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src);
!*/ !*/
void batch_normalize_gradient ( void batch_normalize_gradient (
...@@ -391,7 +391,7 @@ namespace dlib { namespace tt ...@@ -391,7 +391,7 @@ namespace dlib { namespace tt
const tensor& gamma, const tensor& gamma,
const tensor& beta, const tensor& beta,
const tensor& running_means, const tensor& running_means,
const tensor& running_invstds const tensor& running_variances
); );
/*! /*!
requires requires
...@@ -401,13 +401,13 @@ namespace dlib { namespace tt ...@@ -401,13 +401,13 @@ namespace dlib { namespace tt
- gamma.k() == src.k() - gamma.k() == src.k()
- have_same_dimensions(gamma, beta) - have_same_dimensions(gamma, beta)
- have_same_dimensions(gamma, running_means) - have_same_dimensions(gamma, running_means)
- have_same_dimensions(gamma, running_invstds) - have_same_dimensions(gamma, running_variances)
ensures ensures
- Just linearly transforms src as a call to batch_normalize_conv() would if the resulting - Linearly transforms src as a call to batch_normalize_conv() would if src had
means and invstds were running_means and running_invstds. That is, this function means and variances as given by running_means and running_variances. That
performs: is, this function performs:
dest = gamma*(src-running_means)*running_invstds + beta dest = gamma*(src-running_means)/sqrt(running_variances+BATCH_NORM_EPS) + beta
Note that it does it in a pointwise fashion over the samples, rows, and Note that it does this in a pointwise fashion over the samples, rows, and
columns in src. columns in src.
!*/ !*/
...@@ -417,7 +417,7 @@ namespace dlib { namespace tt ...@@ -417,7 +417,7 @@ namespace dlib { namespace tt
resizable_tensor& invstds, resizable_tensor& invstds,
const double averaging_factor, const double averaging_factor,
resizable_tensor& running_means, resizable_tensor& running_means,
resizable_tensor& running_invstds, resizable_tensor& running_variances,
const tensor& src, const tensor& src,
const tensor& gamma, const tensor& gamma,
const tensor& beta const tensor& beta
...@@ -431,7 +431,7 @@ namespace dlib { namespace tt ...@@ -431,7 +431,7 @@ namespace dlib { namespace tt
- 0 <= averaging_factor <= 1 - 0 <= averaging_factor <= 1
- if (averaging_factor != 1) - if (averaging_factor != 1)
- have_same_dimensions(running_means, means) == true - have_same_dimensions(running_means, means) == true
- have_same_dimensions(running_invstds, invstds) == true - have_same_dimensions(running_variances, invstds) == true
ensures ensures
- have_same_dimensions(#dest, src) == true - have_same_dimensions(#dest, src) == true
- #means.num_samples()==means.nr()==means.nc() == 1 - #means.num_samples()==means.nr()==means.nc() == 1
...@@ -441,7 +441,7 @@ namespace dlib { namespace tt ...@@ -441,7 +441,7 @@ namespace dlib { namespace tt
- #means == the mean values of the contents of src. - #means == the mean values of the contents of src.
- #invstds == 1/(the standard deviation values of the contents of src). - #invstds == 1/(the standard deviation values of the contents of src).
- #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means); - #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means);
- #running_invstds = (1-averaging_factor)*mat(#running_invstds) + averaging_factor*mat(#invstds); - #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src);
!*/ !*/
void batch_normalize_conv_gradient ( void batch_normalize_conv_gradient (
......
...@@ -164,12 +164,12 @@ namespace ...@@ -164,12 +164,12 @@ namespace
beta = 0; beta = 0;
resizable_tensor running_means; resizable_tensor running_means;
resizable_tensor running_invstds; resizable_tensor running_variances;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
const double scale = (src.num_samples())/(src.num_samples()-1.0); const double scale = (src.num_samples())/(src.num_samples()-1.0);
// Turn back into biased variance estimate because that's how batch_normalize() works, so if we want to match it this is necessary. // Turn back into biased variance estimate because that's how batch_normalize() works, so if we want to match it this is necessary.
running_invstds = mat(running_invstds)/scale; running_variances = mat(running_variances)/scale;
batch_normalize_inference(dest2, src, gamma, beta, running_means, running_invstds); batch_normalize_inference(dest2, src, gamma, beta, running_means, running_variances);
DLIB_TEST_MSG(max(abs(mat(dest2)-mat(dest))) < 1e-5, max(abs(mat(dest2)-mat(dest)))); DLIB_TEST_MSG(max(abs(mat(dest2)-mat(dest))) < 1e-5, max(abs(mat(dest2)-mat(dest))));
...@@ -177,7 +177,7 @@ namespace ...@@ -177,7 +177,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = src.host()[idx]; const float old = src.host()[idx];
src.host()[idx] += eps; src.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
src.host()[idx] = old; src.host()[idx] = old;
return result; return result;
...@@ -189,7 +189,7 @@ namespace ...@@ -189,7 +189,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = gamma.host()[idx]; const float old = gamma.host()[idx];
gamma.host()[idx] += eps; gamma.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
gamma.host()[idx] = old; gamma.host()[idx] = old;
return result; return result;
...@@ -201,7 +201,7 @@ namespace ...@@ -201,7 +201,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = beta.host()[idx]; const float old = beta.host()[idx];
beta.host()[idx] += eps; beta.host()[idx] += eps;
batch_normalize(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
beta.host()[idx] = old; beta.host()[idx] = old;
return result; return result;
...@@ -247,13 +247,13 @@ namespace ...@@ -247,13 +247,13 @@ namespace
beta = 0; beta = 0;
resizable_tensor running_means; resizable_tensor running_means;
resizable_tensor running_invstds; resizable_tensor running_variances;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
const double scale = (src.num_samples()*src.nr()*src.nc())/(src.num_samples()*src.nr()*src.nc()-1.0); const double scale = (src.num_samples()*src.nr()*src.nc())/(src.num_samples()*src.nr()*src.nc()-1.0);
// Turn back into biased variance estimate because that's how // Turn back into biased variance estimate because that's how
// batch_normalize_conv() works, so if we want to match it this is necessary. // batch_normalize_conv() works, so if we want to match it this is necessary.
running_invstds = mat(running_invstds)/scale; running_variances = mat(running_variances)/scale;
batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_invstds); batch_normalize_conv_inference(dest2, src, gamma, beta, running_means, running_variances);
DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5); DLIB_TEST(max(abs(mat(dest2)-mat(dest))) < 1e-5);
...@@ -261,7 +261,7 @@ namespace ...@@ -261,7 +261,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = src.host()[idx]; const float old = src.host()[idx];
src.host()[idx] += eps; src.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
src.host()[idx] = old; src.host()[idx] = old;
return result; return result;
...@@ -273,7 +273,7 @@ namespace ...@@ -273,7 +273,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = gamma.host()[idx]; const float old = gamma.host()[idx];
gamma.host()[idx] += eps; gamma.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
gamma.host()[idx] = old; gamma.host()[idx] = old;
return result; return result;
...@@ -285,7 +285,7 @@ namespace ...@@ -285,7 +285,7 @@ namespace
auto f = [&](float eps) { auto f = [&](float eps) {
const float old = beta.host()[idx]; const float old = beta.host()[idx];
beta.host()[idx] += eps; beta.host()[idx] += eps;
batch_normalize_conv(dest, means, vars, 1, running_means, running_invstds, src, gamma, beta); batch_normalize_conv(dest, means, vars, 1, running_means, running_variances, src, gamma, beta);
float result = dot(gradient_input, dest); float result = dot(gradient_input, dest);
beta.host()[idx] = old; beta.host()[idx] = old;
return result; return result;
...@@ -775,7 +775,7 @@ namespace ...@@ -775,7 +775,7 @@ namespace
resizable_tensor means, means2; resizable_tensor means, means2;
resizable_tensor invstds, invstds2; resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2; resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2; resizable_tensor running_variances, running_variances2;
resizable_tensor src(64,20,100,100); resizable_tensor src(64,20,100,100);
resizable_tensor gamma(1,20,100,100); resizable_tensor gamma(1,20,100,100);
resizable_tensor beta(1,20,100,100); resizable_tensor beta(1,20,100,100);
...@@ -785,20 +785,20 @@ namespace ...@@ -785,20 +785,20 @@ namespace
rnd.fill_uniform(src); rnd.fill_uniform(src);
cpu::batch_normalize(dest, means, invstds, 1, running_means, running_invstds, src, gamma, beta); cpu::batch_normalize(dest, means, invstds, 1, running_means, running_variances, src, gamma, beta);
cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_invstds2, src, gamma, beta); cuda::batch_normalize(dest2,means2,invstds2, 1, running_means2, running_variances2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2))); dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2))); dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2))); dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2))); dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2))); dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4); DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4); DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4); DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4);
// now check that the gradients match as well // now check that the gradients match as well
...@@ -830,7 +830,7 @@ namespace ...@@ -830,7 +830,7 @@ namespace
resizable_tensor means, means2; resizable_tensor means, means2;
resizable_tensor invstds, invstds2; resizable_tensor invstds, invstds2;
resizable_tensor running_means, running_means2; resizable_tensor running_means, running_means2;
resizable_tensor running_invstds, running_invstds2; resizable_tensor running_variances, running_variances2;
resizable_tensor src(2,8,10,9); resizable_tensor src(2,8,10,9);
resizable_tensor gamma(1,8); resizable_tensor gamma(1,8);
resizable_tensor beta(1,8); resizable_tensor beta(1,8);
...@@ -839,20 +839,20 @@ namespace ...@@ -839,20 +839,20 @@ namespace
tt::tensor_rand rnd; tt::tensor_rand rnd;
rnd.fill_uniform(src); rnd.fill_uniform(src);
cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_invstds, src, gamma, beta); cpu::batch_normalize_conv(dest,means,invstds,1,running_means,running_variances, src, gamma, beta);
cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_invstds2, src, gamma, beta); cuda::batch_normalize_conv(dest2,means2,invstds2,1,running_means2,running_variances2, src, gamma, beta);
dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2))); dlog << LINFO << "dest error: "<< max(abs(mat(dest) -mat(dest2)));
dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2))); dlog << LINFO << "means error: "<< max(abs(mat(means) -mat(means2)));
dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2))); dlog << LINFO << "invstds error: "<< max(abs(mat(invstds) -mat(invstds2)));
dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2))); dlog << LINFO << "running_means error: "<< max(abs(mat(running_means) -mat(running_means2)));
dlog << LINFO << "running_invstds error: "<< max(abs(mat(running_invstds) -mat(running_invstds2))); dlog << LINFO << "running_variances error: "<< max(abs(mat(running_variances) -mat(running_variances2)));
DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4); DLIB_TEST(max(abs(mat(dest) -mat(dest2))) < 1e-4);
DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4); DLIB_TEST(max(abs(mat(means) -mat(means2))) < 1e-4);
DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(invstds) -mat(invstds2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4); DLIB_TEST(max(abs(mat(running_means) -mat(running_means2))) < 1e-4);
DLIB_TEST(max(abs(mat(running_invstds) -mat(running_invstds2))) < 1e-4); DLIB_TEST(max(abs(mat(running_variances) -mat(running_variances2))) < 1e-4);
resizable_tensor gradient_input; resizable_tensor gradient_input;
resizable_tensor src_grad, gamma_grad, beta_grad; resizable_tensor src_grad, gamma_grad, beta_grad;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment