Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
1f0318e2
Commit
1f0318e2
authored
May 26, 2016
by
Fm
Browse files
depth_group replaced with concat layer
parent
93e786db
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
548 additions
and
675 deletions
+548
-675
dlib/dnn/core.h
dlib/dnn/core.h
+2
-493
dlib/dnn/cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+23
-44
dlib/dnn/cpu_dlib.h
dlib/dnn/cpu_dlib.h
+8
-13
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+16
-35
dlib/dnn/cuda_dlib.h
dlib/dnn/cuda_dlib.h
+5
-9
dlib/dnn/layers.h
dlib/dnn/layers.h
+157
-0
dlib/dnn/layers_abstract.h
dlib/dnn/layers_abstract.h
+82
-0
dlib/dnn/tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+13
-16
dlib/dnn/tensor_tools.h
dlib/dnn/tensor_tools.h
+19
-35
dlib/test/dnn.cpp
dlib/test/dnn.cpp
+189
-1
examples/dnn_inception_ex.cpp
examples/dnn_inception_ex.cpp
+34
-29
No files found.
dlib/dnn/core.h
View file @
1f0318e2
...
...
@@ -648,6 +648,7 @@ namespace dlib
friend
class
add_skip_layer
;
template
<
size_t
N
,
template
<
typename
>
class
L
,
typename
S
>
friend
class
repeat
;
friend
class
dnn_tester
;
// Allow copying networks from one to another as long as their corresponding
// layers can be constructed from each other.
...
...
@@ -1520,6 +1521,7 @@ namespace dlib
friend
class
add_skip_layer
;
template
<
size_t
N
,
template
<
typename
>
class
L
,
typename
S
>
friend
class
repeat
;
friend
class
dnn_tester
;
// You wouldn't put a tag on a layer if you didn't want to access its forward
// outputs. So this is always true.
...
...
@@ -3191,499 +3193,6 @@ namespace dlib
// ----------------------------------------------------------------------------------------
namespace
impl
{
template
<
typename
T
>
struct
group_helper
;
template
<
typename
...
R
>
struct
group_count_helper
;
}
// --------------------------------------------------------------------------------------
// this class is used to reference group layer input
class
group_input
{
public:
typedef
tensor
input_type
;
const
static
unsigned
int
sample_expansion_factor
=
1
;
friend
void
serialize
(
const
group_input
&
item
,
std
::
ostream
&
out
)
{
serialize
(
"group_input"
,
out
);
}
friend
void
deserialize
(
group_input
&
item
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"group_input"
)
throw
serialization_error
(
"Unexpected version found while deserializing dlib::group_input."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
group_input
&
item
)
{
out
<<
"group_input"
;
return
out
;
}
};
// --------------------------------------------------------------------------------------
template
<
typename
GRP
,
typename
SUBNET
>
class
depth_group
;
template
<
typename
T
,
typename
U
>
struct
is_nonloss_layer_type
<
depth_group
<
T
,
U
>>
:
std
::
true_type
{};
template
<
typename
GRP
,
typename
SUBNET
>
class
depth_group
{
public:
typedef
GRP
grp_type
;
typedef
SUBNET
subnet_type
;
typedef
typename
subnet_type
::
input_type
input_type
;
const
static
size_t
group_size
=
std
::
tuple_size
<
grp_type
>::
value
;
const
static
size_t
num_layers_in_group
=
impl
::
group_count_helper
<
GRP
>::
num_layers
;
const
static
size_t
num_layers
=
subnet_type
::
num_layers
+
num_layers_in_group
;
const
static
size_t
num_computational_layers_in_group
=
impl
::
group_count_helper
<
GRP
>::
num_computational_layers
;
const
static
size_t
num_computational_layers
=
subnet_type
::
num_computational_layers
+
num_computational_layers_in_group
;
const
static
unsigned
int
sample_expansion_factor
=
subnet_type
::
sample_expansion_factor
;
using
group_helper
=
impl
::
group_helper
<
grp_type
>
;
depth_group
(
)
:
subnetwork
(
new
subnet_type
()),
grp
(
new
grp_type
()),
gradient_input_is_stale
(
true
),
get_output_and_gradient_input_disabled
(
false
)
{
}
depth_group
(
const
depth_group
&
item
)
{
grp
.
reset
(
new
grp_type
(
*
item
.
grp
));
subnetwork
.
reset
(
new
subnet_type
(
*
item
.
subnetwork
));
gradient_input_is_stale
=
item
.
gradient_input_is_stale
;
get_output_and_gradient_input_disabled
=
item
.
get_output_and_gradient_input_disabled
;
x_grad
=
item
.
x_grad
;
cached_output
=
item
.
cached_output
;
temp_tensor
=
item
.
temp_tensor
;
}
depth_group
&
operator
=
(
const
depth_group
&
item
)
{
depth_group
(
item
).
swap
(
*
this
);
return
*
this
;}
depth_group
(
depth_group
&&
item
)
:
depth_group
()
{
swap
(
item
);
}
depth_group
&
operator
=
(
depth_group
&&
item
)
{
swap
(
item
);
return
*
this
;
}
template
<
typename
T
,
typename
U
,
typename
E
>
friend
class
add_layer
;
template
<
typename
T
,
bool
is_first
,
typename
E
>
friend
class
dimpl
::
subnet_wrapper
;
template
<
unsigned
long
T
,
typename
U
,
typename
E
>
friend
class
add_tag_layer
;
template
<
template
<
typename
>
class
T
,
typename
U
>
friend
class
add_skip_layer
;
template
<
size_t
N
,
template
<
typename
>
class
L
,
typename
S
>
friend
class
repeat
;
// Allow copying networks from one to another as long as their corresponding
// layers can be constructed from each other.
template
<
typename
T
,
typename
U
>
depth_group
(
const
depth_group
<
T
,
U
>&
item
)
:
grp
(
new
grp_type
(
item
.
detail
())),
subnetwork
(
new
subnet_type
(
item
.
subnet
())),
gradient_input_is_stale
(
item
.
gradient_input_is_stale
),
get_output_and_gradient_input_disabled
(
item
.
get_output_and_gradient_input_disabled
),
x_grad
(
item
.
x_grad
),
cached_output
(
item
.
cached_output
)
{
}
template
<
typename
input_iterator
>
void
to_tensor
(
input_iterator
ibegin
,
input_iterator
iend
,
resizable_tensor
&
data
)
const
{
subnetwork
->
to_tensor
(
ibegin
,
iend
,
data
);
}
template
<
typename
input_iterator
>
const
tensor
&
operator
()
(
input_iterator
ibegin
,
input_iterator
iend
)
{
to_tensor
(
ibegin
,
iend
,
temp_tensor
);
return
forward
(
temp_tensor
);
}
const
tensor
&
operator
()
(
const
input_type
&
x
)
{
return
(
*
this
)(
&
x
,
&
x
+
1
);
}
// forward for group: subnet->for_each_in_group->concat->cached_output
const
tensor
&
forward
(
const
tensor
&
x
)
{
subnetwork
->
forward
(
x
);
long
group_depth
=
0
;
group_helper
::
forward
(
subnetwork
->
get_output
(),
detail
(),
group_depth
);
auto
&
out_0
=
std
::
get
<
0
>
(
detail
()).
get_output
();
cached_output
.
set_size
(
out_0
.
num_samples
(),
group_depth
,
out_0
.
nr
(),
out_0
.
nc
());
group_helper
::
concat
(
cached_output
,
detail
());
gradient_input_is_stale
=
true
;
return
private_get_output
();
}
private:
bool
this_layer_requires_forward_output
(
)
{
return
true
;
}
tensor
&
private_get_output
()
const
{
return
const_cast
<
resizable_tensor
&>
(
cached_output
);
}
tensor
&
private_get_gradient_input
()
{
if
(
gradient_input_is_stale
)
{
gradient_input_is_stale
=
false
;
x_grad
.
copy_size
(
private_get_output
());
x_grad
=
0
;
}
return
x_grad
;
}
void
disable_output_and_gradient_getters
(
)
{
get_output_and_gradient_input_disabled
=
true
;
}
public:
const
tensor
&
get_output
()
const
{
if
(
get_output_and_gradient_input_disabled
)
throw
dlib
::
error
(
"Accessing this layer's get_output() is disabled because an in-place layer has been stacked on top of it."
);
return
private_get_output
();
}
tensor
&
get_gradient_input
()
{
if
(
get_output_and_gradient_input_disabled
)
throw
dlib
::
error
(
"Accessing this layer's get_gradient_input() is disabled because an in-place layer has been stacked on top of it."
);
return
private_get_gradient_input
();
}
const
tensor
&
get_final_data_gradient
(
)
const
{
return
subnetwork
->
get_final_data_gradient
();
}
void
back_propagate_error
(
const
tensor
&
x
)
{
back_propagate_error
(
x
,
private_get_gradient_input
());
}
void
back_propagate_error
(
const
tensor
&
x
,
const
tensor
&
gradient_input
)
{
group_helper
::
backward
(
detail
(),
get_gradient_input
(),
subnetwork
->
get_output
(),
subnetwork
->
get_gradient_input
());
subnetwork
->
back_propagate_error
(
x
);
// zero out get_gradient_input()
gradient_input_is_stale
=
true
;
}
template
<
typename
solver_type
>
void
update_parameters
(
sstack
<
solver_type
>
solvers
,
double
step_size
)
{
DLIB_CASSERT
(
solvers
.
size
()
>=
num_computational_layers
,
""
);
group_helper
::
update_parameters
(
solvers
,
step_size
,
detail
());
solvers
=
solvers
.
pop
(
num_computational_layers_in_group
);
subnetwork
->
update_parameters
(
solvers
,
step_size
);
}
const
subnet_type
&
subnet
()
const
{
return
*
subnetwork
;
}
subnet_type
&
subnet
()
{
return
*
subnetwork
;
}
const
grp_type
&
detail
()
const
{
return
*
grp
;
}
grp_type
&
detail
()
{
return
*
grp
;
}
void
clean
()
{
x_grad
.
clear
();
cached_output
.
clear
();
temp_tensor
.
clear
();
gradient_input_is_stale
=
true
;
subnetwork
->
clean
();
}
friend
void
serialize
(
const
depth_group
&
item
,
std
::
ostream
&
out
)
{
int
version
=
2
;
serialize
(
version
,
out
);
serialize
(
*
item
.
subnetwork
,
out
);
group_helper
::
serialize
(
*
item
.
grp
,
out
);
serialize
(
item
.
gradient_input_is_stale
,
out
);
serialize
(
item
.
get_output_and_gradient_input_disabled
,
out
);
serialize
(
item
.
x_grad
,
out
);
serialize
(
item
.
cached_output
,
out
);
}
friend
void
deserialize
(
depth_group
&
item
,
std
::
istream
&
in
)
{
int
version
=
0
;
deserialize
(
version
,
in
);
if
(
!
(
1
<=
version
&&
version
<=
2
))
throw
serialization_error
(
"Unexpected version found while deserializing dlib::depth_group."
);
deserialize
(
*
item
.
subnetwork
,
in
);
group_helper
::
deserialize
(
*
item
.
grp
,
in
);
deserialize
(
item
.
gradient_input_is_stale
,
in
);
deserialize
(
item
.
get_output_and_gradient_input_disabled
,
in
);
deserialize
(
item
.
x_grad
,
in
);
deserialize
(
item
.
cached_output
,
in
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
depth_group
&
item
)
{
item
.
print
(
out
,
0
);
return
out
;
}
void
print
(
std
::
ostream
&
out
,
unsigned
long
idx
=
0
)
const
{
out
<<
"layer<"
<<
idx
<<
">
\t
"
;
detail
().
print
(
out
,
idx
);
subnet
().
print
(
out
,
idx
+
1
);
}
private:
void
swap
(
depth_group
&
item
)
{
std
::
swap
(
subnetwork
,
item
.
subnetwork
);
std
::
swap
(
grp
,
item
.
grp
);
std
::
swap
(
gradient_input_is_stale
,
item
.
gradient_input_is_stale
);
std
::
swap
(
get_output_and_gradient_input_disabled
,
item
.
get_output_and_gradient_input_disabled
);
std
::
swap
(
x_grad
,
item
.
x_grad
);
std
::
swap
(
cached_output
,
item
.
cached_output
);
}
std
::
unique_ptr
<
subnet_type
>
subnetwork
;
std
::
unique_ptr
<
grp_type
>
grp
;
bool
gradient_input_is_stale
;
bool
get_output_and_gradient_input_disabled
;
resizable_tensor
x_grad
;
resizable_tensor
cached_output
;
// temp_tensor doesn't logically contribute to the state of this object.
// It is here only to prevent it from being reallocated over and over.
resizable_tensor
temp_tensor
;
};
// define "grp" layer shorter name for usage when creating networks
template
<
typename
GRP
,
typename
SUBNET
>
using
grp
=
depth_group
<
GRP
,
SUBNET
>
;
namespace
impl
{
template
<
unsigned
int
i
,
typename
T
,
typename
U
>
struct
layer_helper
<
i
,
depth_group
<
T
,
U
>
,
typename
std
::
enable_if
<
(
i
!=
0
&&
i
>=
depth_group
<
T
,
U
>::
num_layers_in_group
)
>::
type
>
{
const
static
size_t
num_layers_in_group
=
depth_group
<
T
,
U
>::
num_layers_in_group
;
using
next_type
=
typename
depth_group
<
T
,
U
>::
subnet_type
;
using
type
=
typename
layer_helper
<
i
-
num_layers_in_group
,
next_type
>::
type
;
static
type
&
layer
(
depth_group
<
T
,
U
>
&
n
)
{
return
layer_helper
<
i
-
num_layers_in_group
,
next_type
>::
layer
(
n
.
subnet
());
}
};
template
<
unsigned
int
i
,
typename
T
,
typename
U
>
struct
layer_helper
<
i
,
depth_group
<
T
,
U
>
,
typename
std
::
enable_if
<
(
i
!=
0
&&
i
<
depth_group
<
T
,
U
>::
num_layers_in_group
)
>::
type
>
{
const
static
size_t
num_layers_in_group
=
depth_group
<
T
,
U
>::
num_layers_in_group
;
typedef
typename
depth_group
<
T
,
U
>::
grp_type
grp_type
;
using
type
=
typename
layer_helper
<
i
,
grp_type
>::
type
;
static
type
&
layer
(
depth_group
<
T
,
U
>
&
n
)
{
return
layer_helper
<
i
,
grp_type
>::
layer
(
n
.
detail
());
}
};
template
<
unsigned
int
pos
,
unsigned
int
i
,
typename
...
T
>
struct
group_pos_search
{
const
static
unsigned
int
count
=
sizeof
...(
T
);
const
static
unsigned
int
pos_from_begin
=
count
-
pos
-
1
;
using
tuple_elem_type
=
typename
std
::
tuple_element
<
pos_from_begin
,
std
::
tuple
<
T
...
>>::
type
;
static
const
unsigned
int
num_layers
=
tuple_elem_type
::
num_layers
;
static
const
unsigned
int
layer_index
=
i
>=
num_layers
?
group_pos_search
<
pos
-
1
,
i
-
num_layers
,
T
...
>::
layer_index
:
i
;
static
const
unsigned
int
tuple_index
=
i
>=
num_layers
?
group_pos_search
<
pos
-
1
,
i
-
num_layers
,
T
...
>::
tuple_index
+
1
:
pos
;
};
template
<
unsigned
int
i
,
typename
...
T
>
struct
group_pos_search
<
0
,
i
,
T
...
>
{
static
const
unsigned
int
layer_index
=
i
;
static
const
unsigned
int
tuple_index
=
0
;
};
template
<
unsigned
int
i
,
typename
...
R
>
struct
layer_helper
<
i
,
std
::
tuple
<
R
...
>
,
typename
std
::
enable_if
<
true
>::
type
>
{
const
static
unsigned
tuple_size
=
sizeof
...(
R
);
static
const
unsigned
int
layer_index
=
group_pos_search
<
tuple_size
-
1
,
i
,
R
...
>::
layer_index
;
static
const
unsigned
int
tuple_index
=
group_pos_search
<
tuple_size
-
1
,
i
,
R
...
>::
tuple_index
;
using
next_type
=
typename
std
::
tuple_element
<
tuple_index
,
std
::
tuple
<
R
...
>>::
type
;
//typename std::remove_reference<decltype(makeT().subnet())>::type;
using
type
=
typename
layer_helper
<
layer_index
,
next_type
>::
type
;
static
type
&
layer
(
std
::
tuple
<
R
...
>
&
n
)
{
return
layer_helper
<
layer_index
,
next_type
>::
layer
(
std
::
get
<
tuple_index
>
(
n
));
}
};
// helper classes for layer group processing
template
<
size_t
idx
,
typename
...
T
>
struct
group_helper_impl
{
static
void
serialize_impl
(
const
std
::
tuple
<
T
...
>&
data
,
std
::
ostream
&
out
){
group_helper_impl
<
idx
-
1
,
T
...
>::
serialize_impl
(
data
,
out
);
serialize
(
std
::
get
<
idx
>
(
data
),
out
);
}
static
void
deserialize_impl
(
std
::
tuple
<
T
...
>&
data
,
std
::
istream
&
in
){
group_helper_impl
<
idx
-
1
,
T
...
>::
deserialize_impl
(
data
,
in
);
deserialize
(
std
::
get
<
idx
>
(
data
),
in
);
}
static
void
forward
(
const
tensor
&
x
,
std
::
tuple
<
T
...
>&
grp
,
long
&
group_depth
){
group_helper_impl
<
idx
-
1
,
T
...
>::
forward
(
x
,
grp
,
group_depth
);
auto
&
r
=
std
::
get
<
idx
>
(
grp
).
forward
(
x
);
group_depth
+=
r
.
k
();
}
static
size_t
concat
(
resizable_tensor
&
cached_output
,
std
::
tuple
<
T
...
>&
grp
,
size_t
offset
){
offset
+=
group_helper_impl
<
idx
-
1
,
T
...
>::
concat
(
cached_output
,
grp
,
offset
);
auto
&
output
=
std
::
get
<
idx
>
(
grp
).
get_output
();
tt
::
concat_depth
(
cached_output
,
offset
,
output
);
return
offset
+
output
.
nc
()
*
output
.
nr
()
*
output
.
k
();
}
template
<
typename
solver_type
>
static
sstack
<
solver_type
>
update_parameters
(
sstack
<
solver_type
>
solvers
,
double
step_size
,
std
::
tuple
<
T
...
>&
grp
){
sstack
<
solver_type
>
sub_solvers
=
group_helper_impl
<
idx
-
1
,
T
...
>::
update_parameters
(
solvers
,
step_size
,
grp
);
std
::
get
<
idx
>
(
grp
).
update_parameters
(
sub_solvers
,
step_size
);
using
tuple_elem_type
=
typename
std
::
tuple_element
<
idx
,
std
::
tuple
<
T
...
>>::
type
;
return
sub_solvers
.
pop
(
tuple_elem_type
::
num_computational_layers
);
}
static
size_t
backward
(
std
::
tuple
<
T
...
>&
grp
,
const
tensor
&
group_gradient_in
,
const
tensor
&
subnet_out
,
tensor
&
group_gradient_out
,
size_t
offset
)
{
offset
+=
group_helper_impl
<
idx
-
1
,
T
...
>::
backward
(
grp
,
group_gradient_in
,
subnet_out
,
group_gradient_out
,
offset
);
auto
&
subnet
=
std
::
get
<
idx
>
(
grp
);
auto
&
gr_input
=
subnet
.
get_gradient_input
();
tt
::
split_depth
(
gr_input
,
offset
,
group_gradient_in
);
subnet
.
back_propagate_error
(
subnet_out
);
tt
::
add
(
group_gradient_out
,
group_gradient_out
,
subnet
.
get_final_data_gradient
());
return
offset
+
gr_input
.
nc
()
*
gr_input
.
nr
()
*
gr_input
.
k
();
}
};
template
<
typename
...
T
>
struct
group_helper_impl
<
0
,
T
...
>
{
static
void
serialize_impl
(
const
std
::
tuple
<
T
...
>&
data
,
std
::
ostream
&
out
){
serialize
(
std
::
get
<
0
>
(
data
),
out
);
}
static
void
deserialize_impl
(
std
::
tuple
<
T
...
>&
data
,
std
::
istream
&
in
){
deserialize
(
std
::
get
<
0
>
(
data
),
in
);
}
static
void
forward
(
const
tensor
&
x
,
std
::
tuple
<
T
...
>&
grp
,
long
&
group_depth
){
auto
&
r
=
std
::
get
<
0
>
(
grp
).
forward
(
x
);
group_depth
+=
r
.
k
();
}
static
size_t
concat
(
resizable_tensor
&
cached_output
,
std
::
tuple
<
T
...
>&
grp
,
size_t
offset
){
auto
&
output
=
std
::
get
<
0
>
(
grp
).
get_output
();
tt
::
concat_depth
(
cached_output
,
offset
,
output
);
return
offset
+
output
.
nc
()
*
output
.
nr
()
*
output
.
k
();
}
template
<
typename
solver_type
>
static
sstack
<
solver_type
>
update_parameters
(
sstack
<
solver_type
>
solvers
,
double
step_size
,
std
::
tuple
<
T
...
>&
grp
){
std
::
get
<
0
>
(
grp
).
update_parameters
(
solvers
,
step_size
);
using
tuple_elem_type
=
typename
std
::
tuple_element
<
0
,
std
::
tuple
<
T
...
>>::
type
;
return
solvers
.
pop
(
tuple_elem_type
::
num_computational_layers
);
}
static
size_t
backward
(
std
::
tuple
<
T
...
>&
grp
,
const
tensor
&
group_gradient_in
,
const
tensor
&
subnet_out
,
tensor
&
group_gradient_out
,
size_t
offset
)
{
auto
&
item
=
std
::
get
<
0
>
(
grp
);
auto
&
gr_input
=
item
.
get_gradient_input
();
tt
::
split_depth
(
gr_input
,
offset
,
group_gradient_in
);
item
.
back_propagate_error
(
subnet_out
);
tt
::
add
(
group_gradient_out
,
group_gradient_out
,
item
.
get_final_data_gradient
());
return
offset
+
gr_input
.
nc
()
*
gr_input
.
nr
()
*
gr_input
.
k
();
}
};
template
<
typename
...
T
>
struct
group_helper
<
std
::
tuple
<
T
...
>>
{
static
void
serialize
(
const
std
::
tuple
<
T
...
>
&
data
,
std
::
ostream
&
out
){
group_helper_impl
<
std
::
tuple_size
<
std
::
tuple
<
T
...
>>::
value
-
1
,
T
...
>::
serialize_impl
(
data
,
out
);
}
static
void
deserialize
(
std
::
tuple
<
T
...
>&
data
,
std
::
istream
&
in
){
group_helper_impl
<
std
::
tuple_size
<
std
::
tuple
<
T
...
>>::
value
-
1
,
T
...
>::
deserialize_impl
(
data
,
in
);
}
static
void
forward
(
const
tensor
&
x
,
std
::
tuple
<
T
...
>&
grp
,
long
&
group_depth
){
group_helper_impl
<
std
::
tuple_size
<
std
::
tuple
<
T
...
>>::
value
-
1
,
T
...
>::
forward
(
x
,
grp
,
group_depth
);
}
static
void
concat
(
resizable_tensor
&
out
,
std
::
tuple
<
T
...
>&
grp
){
group_helper_impl
<
std
::
tuple_size
<
std
::
tuple
<
T
...
>>::
value
-
1
,
T
...
>::
concat
(
out
,
grp
,
0
);
}
template
<
typename
solver_type
>
static
void
update_parameters
(
sstack
<
solver_type
>
solvers
,
double
step_size
,
std
::
tuple
<
T
...
>&
grp
){
group_helper_impl
<
std
::
tuple_size
<
std
::
tuple
<
T
...
>>::
value
-
1
,
T
...
>::
update_parameters
(
solvers
,
step_size
,
grp
);
}
static
void
backward
(
std
::
tuple
<
T
...
>&
grp
,
const
tensor
&
group_gradient_in
,
const
tensor
&
subnet_out
,
tensor
&
group_gradient_out
)
{
group_helper_impl
<
std
::
tuple_size
<
std
::
tuple
<
T
...
>>::
value
-
1
,
T
...
>::
backward
(
grp
,
group_gradient_in
,
subnet_out
,
group_gradient_out
,
0
);
}
};
// helper classes to understand the count of group items layers
template
<
typename
T
>
struct
group_count_helper
<
T
>
{
const
static
size_t
num_layers
=
T
::
num_layers
;
const
static
size_t
num_computational_layers
=
T
::
num_computational_layers
;
};
template
<
typename
T
,
typename
...
R
>
struct
group_count_helper
<
T
,
R
...
>
{
const
static
size_t
num_layers
=
group_count_helper
<
T
>::
num_layers
+
group_count_helper
<
R
...
>::
num_layers
;
const
static
size_t
num_computational_layers
=
group_count_helper
<
T
>::
num_computational_layers
+
group_count_helper
<
R
...
>::
num_computational_layers
;
};
template
<
typename
...
R
>
struct
group_count_helper
<
std
::
tuple
<
R
...
>>
{
const
static
size_t
num_layers
=
group_count_helper
<
R
...
>::
num_layers
;
const
static
size_t
num_computational_layers
=
group_count_helper
<
R
...
>::
num_computational_layers
;
};
}
}
#endif // DLIB_DNn_CORE_H_
...
...
dlib/dnn/cpu_dlib.cpp
View file @
1f0318e2
...
...
@@ -1783,58 +1783,37 @@ namespace dlib
filters_gradient
+=
gi
*
temp
;
}
}
// ------------------------------------------------------------------------------------
void
concat_depth
(
// ------------------------------------------------------------------------------------
void
copy_tensor
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
)
{
const
size_t
dest_sample_size
=
static_cast
<
size_t
>
(
dest
.
nc
()
*
dest
.
nr
()
*
dest
.
k
());
const
size_t
src_sample_size
=
static_cast
<
size_t
>
(
src
.
nc
()
*
src
.
nr
()
*
src
.
k
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src
.
num_samples
()
&&
dest
.
nc
()
==
src
.
nc
()
&&
dest
.
nr
()
==
src
.
nr
(),
"All sources should fit into dest tensor size"
);
DLIB_CASSERT
(
dest_sample_size
>=
src_sample_size
+
sample_offset
,
"Not enough space in dest tensor"
);
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
const
size_t
dest_sample_size
=
static_cast
<
size_t
>
(
dest
.
nc
()
*
dest
.
nr
()
*
dest
.
k
());
const
size_t
src_sample_size
=
static_cast
<
size_t
>
(
src
.
nc
()
*
src
.
nr
()
*
src
.
k
());
float
*
dest_p
=
dest
.
host_write_only
()
+
sample_offset
;
const
float
*
src_p
=
src
.
host
();
const
size_t
block_size
=
count_k
*
dest
.
nc
()
*
dest
.
nr
();
for
(
unsigned
long
i
=
0
;
i
<
src
.
num_samples
();
++
i
)
{
::
memcpy
(
dest_p
,
src_p
,
src_sample_size
*
sizeof
(
float
));
DLIB_CASSERT
(
dest
.
num_samples
()
==
src
.
num_samples
()
&&
dest
.
nc
()
==
src
.
nc
()
&&
dest
.
nr
()
==
src
.
nr
(),
"All sources should fit into dest tensor size"
);
DLIB_CASSERT
(
dest
.
k
()
-
dest_k_offset
>=
count_k
,
"Not enough space in dest tensor"
);
DLIB_CASSERT
(
src
.
k
()
-
src_k_offset
>=
count_k
,
"Not enough space in src tensor"
);
dest_p
+=
dest_sample_size
;
src_p
+=
src_sample_size
;
}
}
float
*
dest_p
=
dest
.
host
()
+
dest_k_offset
*
dest
.
nc
()
*
dest
.
nr
();
const
float
*
src_p
=
src
.
host
()
+
src_k_offset
*
src
.
nc
()
*
src
.
nr
();
void
split_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
)
for
(
unsigned
long
i
=
0
;
i
<
src
.
num_samples
();
++
i
)
{
const
size_t
dest_sample_size
=
static_cast
<
size_t
>
(
dest
.
nc
()
*
dest
.
nr
()
*
dest
.
k
());
const
size_t
src_sample_size
=
static_cast
<
size_t
>
(
src
.
nc
()
*
src
.
nr
()
*
src
.
k
());
::
memcpy
(
dest_p
,
src_p
,
block_size
*
sizeof
(
float
));
DLIB_CASSERT
(
dest
.
num_samples
()
==
src
.
num_samples
()
&&
dest
.
nc
()
==
src
.
nc
()
&&
dest
.
nr
()
==
src
.
nr
(),
"All sources should fit into dest tensor size"
);
DLIB_CASSERT
(
dest_sample_size
<=
src_sample_size
-
sample_offset
,
"Not enough space in dest tensor"
);
float
*
dest_p
=
dest
.
host_write_only
();
const
float
*
src_p
=
src
.
host
()
+
sample_offset
;
for
(
unsigned
long
i
=
0
;
i
<
src
.
num_samples
();
++
i
)
{
::
memcpy
(
dest_p
,
src_p
,
dest_sample_size
*
sizeof
(
float
));
dest_p
+=
dest_sample_size
;
src_p
+=
src_sample_size
;
}
dest_p
+=
dest_sample_size
;
src_p
+=
src_sample_size
;
}
}
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/cpu_dlib.h
View file @
1f0318e2
...
...
@@ -384,19 +384,14 @@ namespace dlib
long
last_padding_x
;
};
// ----------------------------------------------------------------------------------------
void
concat_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
);
void
split_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
);
// -----------------------------------------------------------------------------------
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
);
// -----------------------------------------------------------------------------------
}
...
...
dlib/dnn/cuda_dlib.cu
View file @
1f0318e2
...
...
@@ -796,57 +796,38 @@ namespace dlib
grad
.
device
(),
src
.
device
(),
gradient_input
.
device
(),
grad
.
size
(),
param
.
device
(),
params_grad
.
device
());
}
// ----------------------------------------------------------------------------------------
void
concat_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
// ----------------------------------------------------------------------------------------
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
const
size_t
dest_sample_size
=
static_cast
<
size_t
>
(
dest
.
nc
()
*
dest
.
nr
()
*
dest
.
k
());
const
size_t
src_sample_size
=
static_cast
<
size_t
>
(
src
.
nc
()
*
src
.
nr
()
*
src
.
k
());
const
size_t
block_size
=
count_k
*
dest
.
nc
()
*
dest
.
nr
();
DLIB_CASSERT
(
dest
.
num_samples
()
==
src
.
num_samples
()
&&
dest
.
nc
()
==
src
.
nc
()
&&
dest
.
nr
()
==
src
.
nr
(),
"All sources should fit into dest tensor size"
);
DLIB_CASSERT
(
dest_sample_size
>=
src_sample_size
+
sample_offset
,
"Not enough space in dest tensor"
);
DLIB_CASSERT
(
dest
.
k
()
-
dest_k_offset
>=
count_k
,
"Not enough space in dest tensor"
);
DLIB_CASSERT
(
src
.
k
()
-
src_k_offset
>=
count_k
,
"Not enough space in src tensor"
);
float
*
dest_p
=
dest
.
device
()
+
dest_k_offset
*
dest
.
nc
()
*
dest
.
nr
();
const
float
*
src_p
=
src
.
device
()
+
src_k_offset
*
src
.
nc
()
*
src
.
nr
();;
float
*
dest_p
=
dest
.
device_write_only
()
+
sample_offset
;
const
float
*
src_p
=
src
.
device
();
for
(
unsigned
long
i
=
0
;
i
<
src
.
num_samples
();
++
i
)
{
CHECK_CUDA
(
cudaMemcpy
(
dest_p
,
src_p
,
src_sample
_size
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
));
CHECK_CUDA
(
cudaMemcpy
(
dest_p
,
src_p
,
block
_size
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
));
dest_p
+=
dest_sample_size
;
src_p
+=
src_sample_size
;
}
}
void
split_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
)
{
const
size_t
dest_sample_size
=
static_cast
<
size_t
>
(
dest
.
nc
()
*
dest
.
nr
()
*
dest
.
k
());
const
size_t
src_sample_size
=
static_cast
<
size_t
>
(
src
.
nc
()
*
src
.
nr
()
*
src
.
k
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src
.
num_samples
()
&&
dest
.
nc
()
==
src
.
nc
()
&&
dest
.
nr
()
==
src
.
nr
(),
"All sources should fit into dest tensor size"
);
DLIB_CASSERT
(
dest_sample_size
<=
src_sample_size
-
sample_offset
,
"Not enough space in dest tensor"
);
float
*
dest_p
=
dest
.
device_write_only
();
const
float
*
src_p
=
src
.
device
()
+
sample_offset
;
for
(
unsigned
long
i
=
0
;
i
<
src
.
num_samples
();
++
i
)
{
CHECK_CUDA
(
cudaMemcpy
(
dest_p
,
src_p
,
dest_sample_size
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
));
dest_p
+=
dest_sample_size
;
src_p
+=
src_sample_size
;
}
}
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/cuda_dlib.h
View file @
1f0318e2
...
...
@@ -258,16 +258,12 @@ namespace dlib
tensor
&
params_grad
);
void
co
ncat_depth
(
void
co
py_tensor
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
);
void
split_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
const
tensor
&
src
,
size_t
dest_k_offset
,
size_t
src_k_offset
,
size_t
count_k
);
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
...
...
dlib/dnn/layers.h
View file @
1f0318e2
...
...
@@ -1836,6 +1836,163 @@ namespace dlib
template
<
typename
SUBNET
>
using
softmax
=
add_layer
<
softmax_
,
SUBNET
>
;
// ----------------------------------------------------------------------------------------
namespace
impl
{
// helper classes for layer concat processing
template
<
template
<
typename
>
class
...
TAG_TYPES
>
struct
concat_helper_impl
{
};
template
<
template
<
typename
>
class
TAG_TYPE
>
struct
concat_helper_impl
<
TAG_TYPE
>
{
template
<
typename
SUBNET
>
static
void
resize_out
(
resizable_tensor
&
out
,
const
SUBNET
&
sub
,
long
sum_k
)
{
auto
&
t
=
layer
<
TAG_TYPE
>
(
sub
).
get_output
();
out
.
set_size
(
t
.
num_samples
(),
t
.
k
()
+
sum_k
,
t
.
nr
(),
t
.
nc
());
}
template
<
typename
SUBNET
>
static
void
concat
(
tensor
&
out
,
const
SUBNET
&
sub
,
size_t
k_offset
)
{
auto
&
t
=
layer
<
TAG_TYPE
>
(
sub
).
get_output
();
tt
::
copy_tensor
(
out
,
k_offset
,
t
,
0
,
t
.
k
());
}
template
<
typename
SUBNET
>
static
void
split
(
const
tensor
&
input
,
SUBNET
&
sub
,
size_t
k_offset
)
{
auto
&
t
=
layer
<
TAG_TYPE
>
(
sub
).
get_gradient_input
();
tt
::
copy_tensor
(
t
,
0
,
input
,
k_offset
,
t
.
k
());
}
};
template
<
template
<
typename
>
class
TAG_TYPE
,
template
<
typename
>
class
...
TAG_TYPES
>
struct
concat_helper_impl
<
TAG_TYPE
,
TAG_TYPES
...
>
{
template
<
typename
SUBNET
>
static
void
resize_out
(
resizable_tensor
&
out
,
const
SUBNET
&
sub
,
long
sum_k
)
{
auto
&
t
=
layer
<
TAG_TYPE
>
(
sub
).
get_output
();
concat_helper_impl
<
TAG_TYPES
...
>::
resize_out
(
out
,
sub
,
sum_k
+
t
.
k
());
}
template
<
typename
SUBNET
>
static
void
concat
(
tensor
&
out
,
const
SUBNET
&
sub
,
size_t
k_offset
)
{
auto
&
t
=
layer
<
TAG_TYPE
>
(
sub
).
get_output
();
tt
::
copy_tensor
(
out
,
k_offset
,
t
,
0
,
t
.
k
());
k_offset
+=
t
.
k
();
concat_helper_impl
<
TAG_TYPES
...
>::
concat
(
out
,
sub
,
k_offset
);
}
template
<
typename
SUBNET
>
static
void
split
(
const
tensor
&
input
,
SUBNET
&
sub
,
size_t
k_offset
)
{
auto
&
t
=
layer
<
TAG_TYPE
>
(
sub
).
get_gradient_input
();
tt
::
copy_tensor
(
t
,
0
,
input
,
k_offset
,
t
.
k
());
k_offset
+=
t
.
k
();
concat_helper_impl
<
TAG_TYPES
...
>::
split
(
input
,
sub
,
k_offset
);
}
};
}
// concat layer
template
<
template
<
typename
>
class
...
TAG_TYPES
>
class
concat_
{
public:
template
<
typename
SUBNET
>
void
setup
(
const
SUBNET
&
)
{
// do nothing
}
template
<
typename
SUBNET
>
void
forward
(
const
SUBNET
&
sub
,
resizable_tensor
&
output
)
{
// the total depth of result is the sum of depths from all tags
impl
::
concat_helper_impl
<
TAG_TYPES
...
>::
resize_out
(
output
,
sub
,
0
);
// copy output from each tag into different part result
impl
::
concat_helper_impl
<
TAG_TYPES
...
>::
concat
(
output
,
sub
,
0
);
}
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
)
{
// Gradient is splitted into parts for each tag layer
impl
::
concat_helper_impl
<
TAG_TYPES
...
>::
split
(
gradient_input
,
sub
,
0
);
}
const
tensor
&
get_layer_params
()
const
{
return
params
;
}
tensor
&
get_layer_params
()
{
return
params
;
}
friend
void
serialize
(
const
concat_
&
item
,
std
::
ostream
&
out
)
{
serialize
(
"concat_"
,
out
);
serialize
(
sizeof
...(
TAG_TYPES
),
out
);
}
friend
void
deserialize
(
concat_
&
item
,
std
::
istream
&
in
)
{
std
::
string
version
;
deserialize
(
version
,
in
);
if
(
version
!=
"concat_"
)
throw
serialization_error
(
"Unexpected version '"
+
version
+
"' found while deserializing dlib::concat_."
);
size_t
count_tags
;
deserialize
(
count_tags
,
in
);
if
(
count_tags
!=
sizeof
...(
TAG_TYPES
))
throw
serialization_error
(
"Invalid count of tags "
+
std
::
to_string
(
count_tags
)
+
", expecting "
+
std
::
to_string
(
sizeof
...(
TAG_TYPES
))
+
" found while deserializing dlib::concat_."
);
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
concat_
&
item
)
{
out
<<
"concat
\t
("
<<
sizeof
...(
TAG_TYPES
)
<<
")"
;
return
out
;
}
private:
resizable_tensor
params
;
// unused
};
template
<
typename
SUBNET
,
template
<
typename
>
class
...
TAG_TYPES
>
using
concat
=
add_layer
<
concat_
<
TAG_TYPES
...
>
,
SUBNET
>
;
// inception layer will use tags internally. If user will use tags too,
// some conflicts possible
// to exclude them, here are new tags specially for inceptions
template
<
typename
SUBNET
>
using
itag0
=
add_tag_layer
<
1000
+
0
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag1
=
add_tag_layer
<
1000
+
1
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag2
=
add_tag_layer
<
1000
+
2
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag3
=
add_tag_layer
<
1000
+
3
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag4
=
add_tag_layer
<
1000
+
4
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag5
=
add_tag_layer
<
1000
+
5
,
SUBNET
>
;
// skip to inception input
template
<
typename
SUBNET
>
using
iskip
=
add_skip_layer
<
itag0
,
SUBNET
>
;
// here are some templates to be used for creating inception layer groups
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
typename
SUBNET
>
using
inception2
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
itag0
<
SUBNET
>>>>>>
,
itag1
,
itag2
>
;
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
template
<
typename
>
class
B3
,
typename
SUBNET
>
using
inception3
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
iskip
<
itag3
<
B3
<
itag0
<
SUBNET
>>>>>>>>>
,
itag1
,
itag2
,
itag3
>
;
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
template
<
typename
>
class
B3
,
template
<
typename
>
class
B4
,
typename
SUBNET
>
using
inception4
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
iskip
<
itag3
<
B3
<
iskip
<
itag4
<
B4
<
itag0
<
SUBNET
>>>>>>>>>>>>
,
itag1
,
itag2
,
itag3
,
itag4
>
;
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
template
<
typename
>
class
B3
,
template
<
typename
>
class
B4
,
template
<
typename
>
class
B5
,
typename
SUBNET
>
using
inception5
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
iskip
<
itag3
<
B3
<
iskip
<
itag4
<
B4
<
iskip
<
itag5
<
B5
<
itag0
<
SUBNET
>>>>>>>>>>>>>>>
,
itag1
,
itag2
,
itag3
,
itag4
,
itag5
>
;
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/layers_abstract.h
View file @
1f0318e2
...
...
@@ -1652,6 +1652,88 @@ namespace dlib
using
add_prev9_
=
add_prev_
<
tag9
>
;
using
add_prev10_
=
add_prev_
<
tag10
>
;
// ----------------------------------------------------------------------------------------
template
<
template
<
typename
>
class
...
TAG_TYPES
>
class
concat_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
defined above. This layer simply concatenates the output of requiered layers
In particular, it copies each layer's output from TAG_TYPES into the corresponding
place of the result tensor, those producing combined output
The output of each tag layer is stored in a separate part of final output.
FORWARD:
for each (tag in TAG_TYPES)
outout[i, k + tag.k(), r, c] = layer<tag>(subnet).get_output[i, k, r, c]
BACKWARD:
for each (tag in TAG_TYPES)
layer<tag>(subnet).get_gradient_input[i, k, r, c] = input[i, k + tag.k(), r, c]
This layer can be only used with tags inside.
Each tagged layer should have identical num_samples, R and C size
The output will have K = sum(k) of tags, and the, and the output's num_samples,
R and C will be the same as tagged layers
!*/
public:
template
<
typename
SUBNET
>
void
setup
(
const
SUBNET
&
sub
);
template
<
typename
SUBNET
>
void
forward
(
const
SUBNET
&
sub
,
resizable_tensor
&
output
);
template
<
typename
SUBNET
>
void
backward
(
const
tensor
&
gradient_input
,
SUBNET
&
sub
,
tensor
&
params_grad
);
const
tensor
&
get_layer_params
()
const
;
tensor
&
get_layer_params
();
/*!
These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
!*/
};
template
<
typename
SUBNET
,
template
<
typename
>
class
...
TAG_TYPES
>
using
concat
=
add_layer
<
concat_
<
TAG_TYPES
...
>
,
SUBNET
>
;
// inception layer will use tags internally. If user will use tags too,
// some conflicts possible
// to exclude them, here are new tags specially for inceptions
template
<
typename
SUBNET
>
using
itag0
=
add_tag_layer
<
1000
+
0
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag1
=
add_tag_layer
<
1000
+
1
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag2
=
add_tag_layer
<
1000
+
2
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag3
=
add_tag_layer
<
1000
+
3
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag4
=
add_tag_layer
<
1000
+
4
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
itag5
=
add_tag_layer
<
1000
+
5
,
SUBNET
>
;
// skip to inception input
template
<
typename
SUBNET
>
using
iskip
=
add_skip_layer
<
itag0
,
SUBNET
>
;
// here are some templates to be used for creating inception layer groups
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
typename
SUBNET
>
using
inception2
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
itag0
<
SUBNET
>>>>>>
,
itag1
,
itag2
>
;
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
template
<
typename
>
class
B3
,
typename
SUBNET
>
using
inception3
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
iskip
<
itag3
<
B3
<
itag0
<
SUBNET
>>>>>>>>>
,
itag1
,
itag2
,
itag3
>
;
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
template
<
typename
>
class
B3
,
template
<
typename
>
class
B4
,
typename
SUBNET
>
using
inception4
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
iskip
<
itag3
<
B3
<
iskip
<
itag4
<
B4
<
itag0
<
SUBNET
>>>>>>>>>>>>
,
itag1
,
itag2
,
itag3
,
itag4
>
;
template
<
template
<
typename
>
class
B1
,
template
<
typename
>
class
B2
,
template
<
typename
>
class
B3
,
template
<
typename
>
class
B4
,
template
<
typename
>
class
B5
,
typename
SUBNET
>
using
inception5
=
concat
<
itag1
<
B1
<
iskip
<
itag2
<
B2
<
iskip
<
itag3
<
B3
<
iskip
<
itag4
<
B4
<
iskip
<
itag5
<
B5
<
itag0
<
SUBNET
>>>>>>>>>>>>>>>
,
itag1
,
itag2
,
itag3
,
itag4
,
itag5
>
;
// ----------------------------------------------------------------------------------------
}
...
...
dlib/dnn/tensor_tools.cpp
View file @
1f0318e2
...
...
@@ -678,26 +678,23 @@ namespace dlib { namespace tt
#endif
}
// ----------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
void
concat_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
)
{
// ------------------------------------------------------------------------------------
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
)
{
#ifdef DLIB_USE_CUDA
cuda
::
co
ncat_depth
(
dest
,
sample
_offset
,
src
);
cuda
::
co
py_tensor
(
dest
,
dest_k
_offset
,
src
,
src_k_offset
,
count_k
);
#else
cpu
::
co
ncat_depth
(
dest
,
sample
_offset
,
src
);
cpu
::
co
py_tensor
(
dest
,
dest_k
_offset
,
src
,
src_k_offset
,
count_k
);
#endif
}
}
void
split_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
)
{
#ifdef DLIB_USE_CUDA
cuda
::
split_depth
(
dest
,
sample_offset
,
src
);
#else
cpu
::
split_depth
(
dest
,
sample_offset
,
src
);
#endif
}
// ----------------------------------------------------------------------------------------
}}
...
...
dlib/dnn/tensor_tools.h
View file @
1f0318e2
...
...
@@ -1234,41 +1234,25 @@ namespace dlib { namespace tt
};
// ----------------------------------------------------------------------------------------
void
concat_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
);
/*!
requires
- dest.nc() == src.nc()
- dest.nr() == src.nr()
- dest.num_samples() == src.num_samples()
- dest.k() >= src.k() + sample_offset
- is_same_object(dest,src) == false
- sample_offset a count of elements, not bytes
ensures
- performs: dest[i, k + sample_offset, r, c] = src[i, k, r, c], where k in [0..src.k()]
Copies content of each sample from src in to corresponding place of sample at dst
!*/
void
split_depth
(
tensor
&
dest
,
size_t
sample_offset
,
const
tensor
&
src
);
/*!
requires
- dest.nc() == src.nc()
- dest.nr() == src.nr()
- dest.num_samples() == src.num_samples()
- dest.k() <= src.k() - sample_offset
- is_same_object(dest,src) == false
- sample_offset a count of elements, not bytes
ensures
- performs: dest[i, k, r, c] = src[i, k + sample_offset, r, c], where k in [0..dest.k()]
Fills each sample of dst from the corresponding part of each sample at src
!*/
void
copy_tensor
(
tensor
&
dest
,
size_t
dest_k_offset
,
const
tensor
&
src
,
size_t
src_k_offset
,
size_t
count_k
);
/*!
requires
- dest.nc() == src.nc()
- dest.nr() == src.nr()
- dest.num_samples() == src.num_samples()
- dest.k() - dest_k_offset >= count_k
- src.k() - src_k_offset >= count_k
- is_same_object(dest,src) == false
ensures
- performs: dest[i, k + dest_k_offset, r, c] = src[i, k + src_k_offset, r, c], where k in [0..count_k]
Copies content of each sample from src in to corresponding place of sample at dst
!*/
// ----------------------------------------------------------------------------------------
...
...
dlib/test/dnn.cpp
View file @
1f0318e2
...
...
@@ -12,7 +12,77 @@
#include "tester.h"
namespace
namespace
dlib
{
template
<
typename
SUBNET
>
using
concat_block1
=
con
<
5
,
1
,
1
,
1
,
1
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
concat_block2
=
con
<
8
,
3
,
3
,
1
,
1
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
concat_block3
=
max_pool
<
3
,
3
,
1
,
1
,
SUBNET
>
;
template
<
typename
SUBNET
>
using
concat_incept
=
inception3
<
concat_block1
,
concat_block2
,
concat_block3
,
SUBNET
>
;
// this class is a friend of add_layer and can access private members
class
dnn_tester
{
public:
// tester function is a member to have access to a private x_grad member of add_layer
static
void
test_concat
()
{
using
namespace
test
;
using
namespace
std
;
using
namespace
dlib
::
tt
;
print_spinner
();
using
net_type
=
concat_incept
<
input
<
matrix
<
float
>>>
;
resizable_tensor
data
(
10
,
1
,
111
,
222
);
data
=
matrix_cast
<
float
>
(
gaussian_randm
(
data
.
num_samples
(),
data
.
k
()
*
data
.
nr
()
*
data
.
nc
(),
1
));
net_type
net
;
auto
&
out
=
net
.
forward
(
data
);
auto
&
b1o
=
layer
<
itag1
>
(
net
).
get_output
();
auto
&
b2o
=
layer
<
itag2
>
(
net
).
get_output
();
auto
&
b3o
=
layer
<
itag3
>
(
net
).
get_output
();
resizable_tensor
dest
(
10
,
14
,
111
,
222
);
copy_tensor
(
dest
,
0
,
b1o
,
0
,
b1o
.
k
());
copy_tensor
(
dest
,
b1o
.
k
(),
b2o
,
0
,
b2o
.
k
());
copy_tensor
(
dest
,
b1o
.
k
()
+
b2o
.
k
(),
b3o
,
0
,
b3o
.
k
());
DLIB_TEST
(
dest
.
size
()
==
out
.
size
());
int
error
=
memcmp
(
dest
.
host
(),
out
.
host
(),
dest
.
size
());
DLIB_TEST
(
error
==
0
);
resizable_tensor
gr
(
10
,
14
,
111
,
222
);
gr
=
matrix_cast
<
float
>
(
gaussian_randm
(
gr
.
num_samples
(),
gr
.
k
()
*
gr
.
nr
()
*
gr
.
nc
(),
1
));
memcpy
(
net
.
get_gradient_input
(),
gr
);
net
.
back_propagate_error
(
data
);
auto
&
b1g
=
layer
<
itag1
>
(
net
).
subnet
().
x_grad
;
auto
&
b2g
=
layer
<
itag2
>
(
net
).
subnet
().
x_grad
;
auto
&
b3g
=
layer
<
itag3
>
(
net
).
subnet
().
x_grad
;
resizable_tensor
g1
(
10
,
5
,
111
,
222
);
resizable_tensor
g2
(
10
,
8
,
111
,
222
);
resizable_tensor
g3
(
10
,
1
,
111
,
222
);
copy_tensor
(
g1
,
0
,
gr
,
0
,
g1
.
k
());
copy_tensor
(
g2
,
0
,
gr
,
g1
.
k
(),
g2
.
k
());
copy_tensor
(
g3
,
0
,
gr
,
g1
.
k
()
+
g2
.
k
(),
g3
.
k
());
DLIB_TEST
(
g1
.
size
()
==
b1g
.
size
());
error
=
memcmp
(
g1
.
host
(),
b1g
.
host
(),
b1g
.
size
());
DLIB_TEST
(
error
==
0
);
DLIB_TEST
(
g2
.
size
()
==
b2g
.
size
());
error
=
memcmp
(
g2
.
host
(),
b2g
.
host
(),
b2g
.
size
());
DLIB_TEST
(
error
==
0
);
DLIB_TEST
(
g3
.
size
()
==
b3g
.
size
());
error
=
memcmp
(
g3
.
host
(),
b3g
.
host
(),
b3g
.
size
());
DLIB_TEST
(
error
==
0
);
}
};
}
namespace
{
using
namespace
test
;
...
...
@@ -1405,6 +1475,121 @@ namespace
DLIB_TEST
(
count
==
pnet
.
num_computational_layers
);
}
float
tensor_read_cpu
(
const
tensor
&
t
,
long
i
,
long
k
,
long
r
,
long
c
)
{
const
float
*
p
=
t
.
host
()
+
t
.
k
()
*
t
.
nr
()
*
t
.
nc
()
*
i
+
t
.
nr
()
*
t
.
nc
()
*
k
+
t
.
nc
()
*
r
+
c
;
return
*
p
;
}
void
test_copy_tensor_cpu
()
{
using
namespace
dlib
::
tt
;
print_spinner
();
resizable_tensor
dest
(
10
,
9
,
7
,
15
);
resizable_tensor
src1
(
10
,
3
,
7
,
15
);
resizable_tensor
src2
(
10
,
3
,
7
,
15
);
resizable_tensor
src3
(
10
,
9
,
7
,
15
);
dest
=
matrix_cast
<
float
>
(
gaussian_randm
(
dest
.
num_samples
(),
dest
.
k
()
*
dest
.
nr
()
*
dest
.
nc
(),
1
));
src1
=
matrix_cast
<
float
>
(
gaussian_randm
(
src1
.
num_samples
(),
src1
.
k
()
*
src1
.
nr
()
*
src1
.
nc
(),
0
));
src2
=
matrix_cast
<
float
>
(
gaussian_randm
(
src1
.
num_samples
(),
src2
.
k
()
*
src2
.
nr
()
*
src2
.
nc
(),
0
));
src3
=
matrix_cast
<
float
>
(
gaussian_randm
(
src1
.
num_samples
(),
src3
.
k
()
*
src3
.
nr
()
*
src3
.
nc
(),
0
));
cpu
::
copy_tensor
(
dest
,
0
,
src1
,
0
,
src1
.
k
());
//full copy src1->dest
cpu
::
copy_tensor
(
dest
,
src1
.
k
(),
src2
,
0
,
src2
.
k
());
//full copy src2->dest with offset of src1
cpu
::
copy_tensor
(
dest
,
src1
.
k
()
+
src2
.
k
(),
src3
,
3
,
3
);
//partial copy src3 into the rest place of dest
for
(
long
i
=
0
;
i
<
dest
.
num_samples
();
++
i
)
{
for
(
long
k
=
0
;
k
<
dest
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
dest
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
dest
.
nc
();
++
c
)
{
float
dest_value
=
tensor_read_cpu
(
dest
,
i
,
k
,
r
,
c
);
// first part is from src1
if
(
k
<
src1
.
k
())
{
float
src_value
=
tensor_read_cpu
(
src1
,
i
,
k
,
r
,
c
);
DLIB_TEST
(
src_value
==
dest_value
);
}
// second part is from src2
else
if
(
k
<
src1
.
k
()
+
src2
.
k
())
{
float
src_value
=
tensor_read_cpu
(
src2
,
i
,
k
-
src1
.
k
(),
r
,
c
);
DLIB_TEST
(
src_value
==
dest_value
);
}
// third part is from src3
else
{
float
src_value
=
tensor_read_cpu
(
src3
,
i
,
k
-
src1
.
k
()
-
src2
.
k
()
+
3
,
r
,
c
);
DLIB_TEST
(
src_value
==
dest_value
);
}
}
}
}
}
}
#ifdef DLIB_USE_CUDA
float
tensor_read_gpu
(
const
tensor
&
t
,
long
i
,
long
k
,
long
r
,
long
c
)
{
const
float
*
p
=
t
.
device
()
+
t
.
k
()
*
t
.
nr
()
*
t
.
nc
()
*
i
+
t
.
nr
()
*
t
.
nc
()
*
k
+
t
.
nc
()
*
r
+
c
;
return
*
p
;
}
void
test_copy_tensor_gpu
()
{
using
namespace
dlib
::
tt
;
print_spinner
();
resizable_tensor
dest
(
10
,
9
,
7
,
15
);
resizable_tensor
src1
(
10
,
3
,
7
,
15
);
resizable_tensor
src2
(
10
,
3
,
7
,
15
);
resizable_tensor
src3
(
10
,
9
,
7
,
15
);
dest
=
matrix_cast
<
float
>
(
gaussian_randm
(
dest
.
num_samples
(),
dest
.
k
()
*
dest
.
nr
()
*
dest
.
nc
(),
1
));
src1
=
matrix_cast
<
float
>
(
gaussian_randm
(
src1
.
num_samples
(),
src1
.
k
()
*
src1
.
nr
()
*
src1
.
nc
(),
0
));
src2
=
matrix_cast
<
float
>
(
gaussian_randm
(
src1
.
num_samples
(),
src2
.
k
()
*
src2
.
nr
()
*
src2
.
nc
(),
0
));
src3
=
matrix_cast
<
float
>
(
gaussian_randm
(
src1
.
num_samples
(),
src3
.
k
()
*
src3
.
nr
()
*
src3
.
nc
(),
0
));
gpu
::
copy_tensor
(
dest
,
0
,
src1
,
0
,
src1
.
k
());
//full copy src1->dest
gpu
::
copy_tensor
(
dest
,
src1
.
k
(),
src2
,
0
,
src2
.
k
());
//full copy src2->dest with offset of src1
gpu
::
copy_tensor
(
dest
,
src1
.
k
()
+
src2
.
k
(),
src3
,
3
,
3
);
//partial copy src3 into the rest place of dest
for
(
long
i
=
0
;
i
<
dest
.
num_samples
();
++
i
)
{
for
(
long
k
=
0
;
k
<
dest
.
k
();
++
k
)
{
for
(
long
r
=
0
;
r
<
dest
.
nr
();
++
r
)
{
for
(
long
c
=
0
;
c
<
dest
.
nc
();
++
c
)
{
float
dest_value
=
tensor_read_gpu
(
dest
,
i
,
k
,
r
,
c
);
// first part is from src1
if
(
k
<
src1
.
k
())
{
float
src_value
=
tensor_read_gpu
(
src1
,
i
,
k
,
r
,
c
);
DLIB_TEST
(
src_value
==
dest_value
);
}
// second part is from src2
else
if
(
k
<
src1
.
k
()
+
src2
.
k
())
{
float
src_value
=
tensor_read_gpu
(
src2
,
i
,
k
-
src1
.
k
(),
r
,
c
);
DLIB_TEST
(
src_value
==
dest_value
);
}
// third part is from src3
else
{
float
src_value
=
tensor_read_gpu
(
src3
,
i
,
k
-
src1
.
k
()
-
src2
.
k
()
+
3
,
r
,
c
);
DLIB_TEST
(
src_value
==
dest_value
);
}
}
}
}
}
}
#endif//DLIB_USE_CUDA
// ----------------------------------------------------------------------------------------
class
dnn_tester
:
public
tester
...
...
@@ -1433,6 +1618,7 @@ namespace
compare_bn_conv_gpu_and_cpu
();
test_add
();
compare_adam
();
test_copy_tensor_gpu
();
#endif
test_max_pool
(
1
,
1
,
2
,
3
,
0
,
0
);
test_max_pool
(
3
,
3
,
1
,
1
,
0
,
0
);
...
...
@@ -1466,6 +1652,8 @@ namespace
test_basic_tensor_ops
();
test_layers
();
test_visit_funcions
();
test_copy_tensor_cpu
();
dlib
::
dnn_tester
::
test_concat
();
}
}
a
;
...
...
examples/dnn_inception_ex.cpp
View file @
1f0318e2
...
...
@@ -15,22 +15,42 @@
#include <dlib/dnn.h>
#include <iostream>
#include <dlib/data_io.h>
#include <tuple>
using
namespace
std
;
using
namespace
dlib
;
// Here we define inception module as described in GoogLeNet specification. The depth of each sublayer can be changed
template
<
typename
SUBNET
>
using
inception
=
grp
<
std
::
tuple
<
con
<
8
,
1
,
1
,
1
,
1
,
group_input
>
,
con
<
8
,
3
,
3
,
1
,
1
,
con
<
8
,
1
,
1
,
1
,
1
,
group_input
>>
,
con
<
8
,
5
,
5
,
1
,
1
,
con
<
8
,
1
,
1
,
1
,
1
,
group_input
>>
,
con
<
8
,
1
,
1
,
1
,
1
,
max_pool
<
3
,
3
,
1
,
1
,
group_input
>>>
,
SUBNET
>
;
// Inception layer has some different convolutions inside
// Here we define blocks as convolutions with different kernel size that we will use in
// inception layer block.
template
<
typename
SUBNET
>
using
block_a1
=
relu
<
con
<
4
,
1
,
1
,
1
,
1
,
SUBNET
>>
;
template
<
typename
SUBNET
>
using
block_a2
=
relu
<
con
<
4
,
3
,
3
,
1
,
1
,
relu
<
con
<
4
,
1
,
1
,
1
,
1
,
SUBNET
>>>>
;
template
<
typename
SUBNET
>
using
block_a3
=
relu
<
con
<
4
,
5
,
5
,
1
,
1
,
relu
<
con
<
4
,
1
,
1
,
1
,
1
,
SUBNET
>>>>
;
template
<
typename
SUBNET
>
using
block_a4
=
relu
<
con
<
4
,
1
,
1
,
1
,
1
,
max_pool
<
3
,
3
,
1
,
1
,
SUBNET
>>>
;
// Here is inception layer definition. It uses different blocks to process input and returns combined output
template
<
typename
SUBNET
>
using
incept_a
=
inception4
<
block_a1
,
block_a2
,
block_a3
,
block_a4
,
SUBNET
>
;
// Network can have inception layers of different structure.
// Here are blocks with different convolutions
template
<
typename
SUBNET
>
using
block_b1
=
relu
<
con
<
8
,
1
,
1
,
1
,
1
,
SUBNET
>>
;
template
<
typename
SUBNET
>
using
block_b2
=
relu
<
con
<
8
,
3
,
3
,
1
,
1
,
SUBNET
>>
;
template
<
typename
SUBNET
>
using
block_b3
=
relu
<
con
<
8
,
1
,
1
,
1
,
1
,
max_pool
<
3
,
3
,
1
,
1
,
SUBNET
>>>
;
// Here is inception layer definition. It uses different blocks to process input and returns combined output
template
<
typename
SUBNET
>
using
incept_b
=
inception3
<
block_b1
,
block_b2
,
block_b3
,
SUBNET
>
;
// and then the network type is
using
net_type
=
loss_multiclass_log
<
fc
<
10
,
relu
<
fc
<
32
,
max_pool
<
2
,
2
,
2
,
2
,
incept_b
<
max_pool
<
2
,
2
,
2
,
2
,
incept_a
<
input
<
matrix
<
unsigned
char
>>
>>>>>>>>
;
int
main
(
int
argc
,
char
**
argv
)
try
{
// This example is going to run on the MNIST dataset.
// This example is going to run on the MNIST dataset.
if
(
argc
!=
2
)
{
cout
<<
"This example needs the MNIST dataset to run!"
<<
endl
;
...
...
@@ -48,25 +68,10 @@ int main(int argc, char** argv) try
load_mnist_dataset
(
argv
[
1
],
training_images
,
training_labels
,
testing_images
,
testing_labels
);
// Create a the same network as in dnn_mnist_ex, but use inception layer insteam of convolution
// in the middle
using
net_type
=
loss_multiclass_log
<
fc
<
10
,
relu
<
fc
<
84
,
relu
<
fc
<
120
,
max_pool
<
2
,
2
,
2
,
2
,
relu
<
inception
<
max_pool
<
2
,
2
,
2
,
2
,
relu
<
con
<
6
,
5
,
5
,
1
,
1
,
input
<
matrix
<
unsigned
char
>>
>>>>>>>>>>>>
;
// Create a network as defined above. This network will produce 10 outputs
// because that's how we defined net_type. However, fc layers can have the
// number of outputs they produce changed at runtime.
// The rest of the sample is identical to dnn_minst_ex
// Create network of predefined type.
net_type
net
;
// the following training process is the same as in dnn_mnist_ex sample
// And then train it using the MNIST data. The code below uses mini-batch stochastic
// gradient descent with an initial learning rate of 0.01 to accomplish this.
dnn_trainer
<
net_type
>
trainer
(
net
);
...
...
@@ -80,12 +85,12 @@ int main(int argc, char** argv) try
// from scratch. This is because, when the program restarts, this call to
// set_synchronization_file() will automatically reload the settings from mnist_sync if
// the file exists.
trainer
.
set_synchronization_file
(
"
mnist
_sync"
,
std
::
chrono
::
seconds
(
20
));
trainer
.
set_synchronization_file
(
"
inception
_sync"
,
std
::
chrono
::
seconds
(
20
));
// Finally, this line begins training. By default, it runs SGD with our specified
// learning rate until the loss stops decreasing. Then it reduces the learning rate by
// a factor of 10 and continues running until the loss stops decreasing again. It will
// keep doing this until the learning rate has dropped below the min learning rate
// defined above or the maximum number of epochs as been executed (defaulted to 10000).
// defined above or the maximum number of epochs as been executed (defaulted to 10000).
trainer
.
train
(
training_images
,
training_labels
);
// At this point our net object should have learned how to classify MNIST images. But
...
...
@@ -96,7 +101,7 @@ int main(int argc, char** argv) try
// about that kind of transient data so that our file will be smaller. We do this by
// "cleaning" the network before saving it.
net
.
clean
();
serialize
(
"mnist_network.dat"
)
<<
net
;
serialize
(
"mnist_network
_inception
.dat"
)
<<
net
;
// Now if we later wanted to recall the network from disk we can simply say:
// deserialize("mnist_network.dat") >> net;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment