Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
ad40ddd3
Commit
ad40ddd3
authored
Dec 06, 2015
by
Davis King
Browse files
Made test_layer() a little more robust.
parent
cbce85ec
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
151 additions
and
145 deletions
+151
-145
dlib/dnn/core.h
dlib/dnn/core.h
+151
-145
No files found.
dlib/dnn/core.h
View file @
ad40ddd3
...
...
@@ -1896,168 +1896,174 @@ namespace dlib
using
namespace
timpl
;
// Do some setup
dlib
::
rand
rnd
;
test_layer_subnet
subnetwork
(
rnd
);
resizable_tensor
output
,
out2
,
out3
;
// Run setup() and forward() as well to make sure any calls to subnet() have
// happened before we start assuming we know how many data elements there are
// (since we do a lazy layer creation thing based on calls to subnet() inside
// test_layer_subnet).
l
.
setup
(
subnetwork
);
impl
::
call_layer_forward
(
l
,
subnetwork
,
output
);
resizable_tensor
input_grad
;
input_grad
.
copy_size
(
output
);
fill_with_gassuan_random_numbers
(
input_grad
,
rnd
);
std
::
ostringstream
sout
;
// The f() we are computing gradients of is this thing. It's value at the current
// parameter and data values is:
//sout << "f(data,params): " << dot(output, input_grad) << std::endl;
// We are going to save a copy of the subnetwork.get_gradient_input() data before we do
// backpropagation since the backward() function is supposed to *add* to the
// gradients rather than overwrite them. We will use this saved data to check if
// that is the case.
const
unsigned
long
num_data_inputs
=
subnetwork
.
count_outputs
();
std
::
vector
<
float
>
initial_gradient_input
(
num_data_inputs
);
for
(
unsigned
long
i
=
0
;
i
<
num_data_inputs
;
++
i
)
initial_gradient_input
[
i
]
=
subnetwork
.
get_gradient_input_element
(
i
);
// Now tell the layer to compute all the gradients. In the rest of this function
// we will just be checking that these gradients were computed correctly by
// comparing them to a central differences approximation.
resizable_tensor
params_grad
;
params_grad
.
copy_size
(
l
.
get_layer_params
());
// But first, set the params grad to something crazy so that it's very obvious if
// it doesn't get fully assigned.
params_grad
=
std
::
numeric_limits
<
float
>::
infinity
();
impl
::
call_layer_backward
(
l
,
output
,
input_grad
,
subnetwork
,
params_grad
);
static_assert
(
impl
::
is_inplace_layer
(
l
,
subnetwork
)
==
impl
::
has_inplace_backward
(
l
,
subnetwork
),
"Layer not defined correctly. forward and backward methods must either both be in-place or both out-of-place. "
);
// Make sure the outputs of forward() and backward() are the same when they are run
// in in-place mode.
if
(
impl
::
is_inplace_layer
(
l
,
subnetwork
))
{
test_layer_subnet
subnetwork2
(
rnd
);
layer_details_type
ll
(
l
);
ll
.
setup
(
subnetwork2
);
resizable_tensor
ip_out
;
impl
::
call_layer_forward
(
ll
,
subnetwork2
,
ip_out
);
impl
::
call_layer_forward
(
ll
,
subnetwork2
,
subnetwork2
.
get_mutable_output
());
const
auto
forward_error
=
max
(
abs
(
mat
(
ip_out
)
-
mat
(
subnetwork2
.
get_output
())));
if
(
forward_error
>
0.00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of forward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
forward_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
for
(
int
iter
=
0
;
iter
<
5
;
++
iter
)
{
test_layer_subnet
subnetwork
(
rnd
);
resizable_tensor
output
,
out2
,
out3
;
// Run setup() and forward() as well to make sure any calls to subnet() have
// happened before we start assuming we know how many data elements there are
// (since we do a lazy layer creation thing based on calls to subnet() inside
// test_layer_subnet).
l
.
setup
(
subnetwork
);
impl
::
call_layer_forward
(
l
,
subnetwork
,
output
);
resizable_tensor
input_grad
;
input_grad
.
copy_size
(
output
);
fill_with_gassuan_random_numbers
(
input_grad
,
rnd
);
std
::
ostringstream
sout
;
// The f() we are computing gradients of is this thing. It's value at the current
// parameter and data values is:
//sout << "f(data,params): " << dot(output, input_grad) << std::endl;
// We are going to save a copy of the subnetwork.get_gradient_input() data before we do
// backpropagation since the backward() function is supposed to *add* to the
// gradients rather than overwrite them. We will use this saved data to check if
// that is the case.
const
unsigned
long
num_data_inputs
=
subnetwork
.
count_outputs
();
std
::
vector
<
float
>
initial_gradient_input
(
num_data_inputs
);
for
(
unsigned
long
i
=
0
;
i
<
num_data_inputs
;
++
i
)
initial_gradient_input
[
i
]
=
subnetwork
.
get_gradient_input_element
(
i
);
// Now tell the layer to compute all the gradients. In the rest of this function
// we will just be checking that these gradients were computed correctly by
// comparing them to a central differences approximation.
resizable_tensor
params_grad
;
params_grad
.
copy_size
(
ll
.
get_layer_params
());
params_grad
.
copy_size
(
l
.
get_layer_params
());
// But first, set the params grad to something crazy so that it's very obvious if
// it doesn't get fully assigned.
params_grad
=
std
::
numeric_limits
<
float
>::
infinity
();
impl
::
call_layer_backward
(
l
,
output
,
input_grad
,
subnetwork
,
params_grad
);
resizable_tensor
input_grad
;
input_grad
.
copy_size
(
ip_out
);
fill_with_gassuan_random_numbers
(
input_grad
,
rnd
);
resizable_tensor
params_grad1
,
params_grad2
,
data_grad1
,
data_grad2
;
params_grad1
=
params_grad
;
params_grad2
=
params_grad
;
// Now call backward() and make sure it works as well.
subnetwork2
.
get_gradient_input
()
=
9999
;
impl
::
call_layer_backward
(
ll
,
ip_out
,
input_grad
,
subnetwork2
,
params_grad1
);
data_grad1
=
subnetwork2
.
get_gradient_input
();
subnetwork2
.
get_gradient_input
()
=
mat
(
input_grad
);
impl
::
call_layer_backward
(
ll
,
ip_out
,
subnetwork2
.
get_gradient_input
(),
subnetwork2
,
params_grad2
);
data_grad2
=
subnetwork2
.
get_gradient_input
();
if
(
params_grad
.
size
()
!=
0
)
static_assert
(
impl
::
is_inplace_layer
(
l
,
subnetwork
)
==
impl
::
has_inplace_backward
(
l
,
subnetwork
),
"Layer not defined correctly. forward and backward methods must either both be in-place or both out-of-place. "
);
// Make sure the outputs of forward() and backward() are the same when they are run
// in in-place mode.
if
(
impl
::
is_inplace_layer
(
l
,
subnetwork
))
{
const
auto
backward_param_error
=
max
(
abs
(
mat
(
params_grad1
)
-
mat
(
params_grad2
)));
if
(
backward_param_error
>
0.00001
)
test_layer_subnet
subnetwork2
(
rnd
);
layer_details_type
ll
(
l
);
ll
.
setup
(
subnetwork2
);
resizable_tensor
ip_out
;
impl
::
call_layer_forward
(
ll
,
subnetwork2
,
ip_out
);
impl
::
call_layer_forward
(
ll
,
subnetwork2
,
subnetwork2
.
get_mutable_output
());
const
auto
forward_error
=
max
(
abs
(
mat
(
ip_out
)
-
mat
(
subnetwork2
.
get_output
())));
if
(
forward_error
>
0.00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of forward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
forward_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
resizable_tensor
params_grad
;
params_grad
.
copy_size
(
ll
.
get_layer_params
());
params_grad
=
std
::
numeric_limits
<
float
>::
infinity
();
resizable_tensor
input_grad
;
input_grad
.
copy_size
(
ip_out
);
fill_with_gassuan_random_numbers
(
input_grad
,
rnd
);
resizable_tensor
params_grad1
,
params_grad2
,
data_grad1
,
data_grad2
;
params_grad1
=
params_grad
;
params_grad2
=
params_grad
;
// Now call backward() and make sure it works as well.
subnetwork2
.
get_gradient_input
()
=
9999
;
impl
::
call_layer_backward
(
ll
,
ip_out
,
input_grad
,
subnetwork2
,
params_grad1
);
data_grad1
=
subnetwork2
.
get_gradient_input
();
subnetwork2
.
get_gradient_input
()
=
mat
(
input_grad
);
impl
::
call_layer_backward
(
ll
,
ip_out
,
subnetwork2
.
get_gradient_input
(),
subnetwork2
,
params_grad2
);
data_grad2
=
subnetwork2
.
get_gradient_input
();
if
(
params_grad
.
size
()
!=
0
)
{
const
auto
backward_param_error
=
max
(
abs
(
mat
(
params_grad1
)
-
mat
(
params_grad2
)));
if
(
backward_param_error
>
0.00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of backward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
backward_param_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
const
auto
backward_data_error
=
max
(
abs
(
mat
(
data_grad1
)
-
mat
(
data_grad2
)));
if
(
backward_data_error
>
0.00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of backward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
backward_
param
_error
<<
endl
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
backward_
data
_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
const
auto
backward_data_error
=
max
(
abs
(
mat
(
data_grad1
)
-
mat
(
data_grad2
)));
if
(
backward_data_error
>
0.00001
)
{
using
namespace
std
;
sout
<<
"This layer is supposed to support in-place computations but the output of backward_inplace()
\n
"
;
sout
<<
"changes when invoked in-place vs. out-of-place. The error was: "
<<
backward_data_error
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
// ==================================================================
// first validate the way the parameter gradients are computed
for
(
unsigned
long
i
=
0
;
i
<
params_grad
.
size
();
++
i
)
{
layer_details_type
l1
(
l
);
float
eps
=
l1
.
get_layer_params
().
host
()[
i
]
*
base_eps
;
if
(
eps
==
0
)
eps
=
base_eps
;
const
float
oldval
=
l1
.
get_layer_params
().
host
()[
i
];
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
+
eps
;
impl
::
call_layer_forward
(
l1
,
subnetwork
,
out2
);
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
-
eps
;
impl
::
call_layer_forward
(
l1
,
subnetwork
,
out3
);
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
;
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
double
reference_derivative
=
(
dot
(
out2
,
input_grad
)
-
dot
(
out3
,
input_grad
))
/
(
2
*
eps
);
double
output_derivative
=
params_grad
.
host
()[
i
];
double
relative_error
=
(
reference_derivative
-
output_derivative
)
/
(
reference_derivative
+
1e-100
);
if
(
std
::
abs
(
relative_error
)
>
0.01
)
// ==================================================================
// first validate the way the parameter gradients are computed
for
(
unsigned
long
i
=
0
;
i
<
params_grad
.
size
();
++
i
)
{
using
namespace
std
;
sout
<<
"Gradient error in parameter #"
<<
i
<<
". Relative error: "
<<
relative_error
<<
endl
;
sout
<<
"expected derivative: "
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative: "
<<
output_derivative
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
layer_details_type
l1
(
l
);
float
eps
=
l1
.
get_layer_params
().
host
()[
i
]
*
base_eps
;
if
(
eps
==
0
)
eps
=
base_eps
;
const
float
oldval
=
l1
.
get_layer_params
().
host
()[
i
];
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
+
eps
;
impl
::
call_layer_forward
(
l1
,
subnetwork
,
out2
);
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
-
eps
;
impl
::
call_layer_forward
(
l1
,
subnetwork
,
out3
);
l1
.
get_layer_params
().
host
()[
i
]
=
oldval
;
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
double
reference_derivative
=
(
dot
(
out2
,
input_grad
)
-
dot
(
out3
,
input_grad
))
/
(
2
*
eps
);
double
output_derivative
=
params_grad
.
host
()[
i
];
double
relative_error
=
(
reference_derivative
-
output_derivative
)
/
(
reference_derivative
+
1e-100
);
double
absolute_error
=
(
reference_derivative
-
output_derivative
);
if
(
std
::
abs
(
relative_error
)
>
0.02
&&
std
::
abs
(
absolute_error
)
>
0.001
)
{
using
namespace
std
;
sout
<<
"Gradient error in parameter #"
<<
i
<<
". Relative error: "
<<
relative_error
<<
endl
;
sout
<<
"expected derivative: "
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative: "
<<
output_derivative
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
}
// ==================================================================
// now validate the data gradients
for
(
unsigned
long
i
=
0
;
i
<
num_data_inputs
;
++
i
)
{
const
float
oldval
=
subnetwork
.
get_output_element
(
i
);
float
eps
=
oldval
*
base_eps
;
if
(
eps
==
0
)
eps
=
base_eps
;
subnetwork
.
get_output_element
(
i
)
=
oldval
+
eps
;
impl
::
call_layer_forward
(
l
,
subnetwork
,
out2
);
subnetwork
.
get_output_element
(
i
)
=
oldval
-
eps
;
impl
::
call_layer_forward
(
l
,
subnetwork
,
out3
);
subnetwork
.
get_output_element
(
i
)
=
oldval
;
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
double
reference_derivative
=
(
dot
(
out2
,
input_grad
)
-
dot
(
out3
,
input_grad
))
/
(
2
*
eps
);
double
output_derivative
=
subnetwork
.
get_gradient_input_element
(
i
);
if
(
!
impl
::
is_inplace_layer
(
l
,
subnetwork
))
output_derivative
-=
initial_gradient_input
[
i
];
double
relative_error
=
(
reference_derivative
-
output_derivative
)
/
(
reference_derivative
+
1e-100
);
if
(
std
::
abs
(
relative_error
)
>
0.01
)
// ==================================================================
// now validate the data gradients
for
(
unsigned
long
i
=
0
;
i
<
num_data_inputs
;
++
i
)
{
using
namespace
std
;
sout
<<
"Gradient error in data variable #"
<<
i
<<
". Relative error: "
<<
relative_error
<<
endl
;
sout
<<
"expected derivative: "
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative: "
<<
output_derivative
<<
endl
;
return
layer_test_results
(
sout
.
str
());
const
float
oldval
=
subnetwork
.
get_output_element
(
i
);
float
eps
=
oldval
*
base_eps
;
if
(
eps
==
0
)
eps
=
base_eps
;
subnetwork
.
get_output_element
(
i
)
=
oldval
+
eps
;
impl
::
call_layer_forward
(
l
,
subnetwork
,
out2
);
subnetwork
.
get_output_element
(
i
)
=
oldval
-
eps
;
impl
::
call_layer_forward
(
l
,
subnetwork
,
out3
);
subnetwork
.
get_output_element
(
i
)
=
oldval
;
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
double
reference_derivative
=
(
dot
(
out2
,
input_grad
)
-
dot
(
out3
,
input_grad
))
/
(
2
*
eps
);
double
output_derivative
=
subnetwork
.
get_gradient_input_element
(
i
);
if
(
!
impl
::
is_inplace_layer
(
l
,
subnetwork
))
output_derivative
-=
initial_gradient_input
[
i
];
double
relative_error
=
(
reference_derivative
-
output_derivative
)
/
(
reference_derivative
+
1e-100
);
double
absolute_error
=
(
reference_derivative
-
output_derivative
);
if
(
std
::
abs
(
relative_error
)
>
0.02
&&
std
::
abs
(
absolute_error
)
>
0.001
)
{
using
namespace
std
;
sout
<<
"Gradient error in data variable #"
<<
i
<<
". Relative error: "
<<
relative_error
<<
endl
;
sout
<<
"expected derivative: "
<<
reference_derivative
<<
endl
;
sout
<<
"output derivative: "
<<
output_derivative
<<
endl
;
return
layer_test_results
(
sout
.
str
());
}
}
}
}
// end for (int iter = 0; iter < 5; ++iter)
return
layer_test_results
();
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment