Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
67170338
Commit
67170338
authored
Apr 02, 2017
by
Davis King
Browse files
Added a version of tt::affine_transform() that operates on a sub-rectangle
rather than the entire tensor.
parent
e0a14376
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
204 additions
and
0 deletions
+204
-0
dlib/dnn/cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+40
-0
dlib/dnn/cpu_dlib.h
dlib/dnn/cpu_dlib.h
+14
-0
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+51
-0
dlib/dnn/cuda_dlib.h
dlib/dnn/cuda_dlib.h
+12
-0
dlib/dnn/tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+18
-0
dlib/dnn/tensor_tools.h
dlib/dnn/tensor_tools.h
+29
-0
dlib/test/dnn.cpp
dlib/test/dnn.cpp
+40
-0
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
67170338
...
...
@@ -486,6 +486,46 @@ namespace dlib
}
}
// ----------------------------------------------------------------------------------------
void
affine_transform
(
const
rectangle
&
rect
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
float
A
,
float
B
,
float
C
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src1
.
size
());
DLIB_CASSERT
(
dest
.
size
()
==
src2
.
size
());
DLIB_CASSERT
(
dest
.
size
()
==
src3
.
size
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src1
.
num_samples
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src2
.
num_samples
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src3
.
num_samples
());
DLIB_CASSERT
(
rectangle
(
0
,
0
,
dest
.
size
()
/
dest
.
num_samples
()
-
1
,
dest
.
num_samples
()
-
1
).
contains
(
rect
));
auto
d
=
dest
.
host
();
auto
s1
=
src1
.
host
();
auto
s2
=
src2
.
host
();
auto
s3
=
src3
.
host
();
const
auto
nc
=
dest
.
size
()
/
dest
.
num_samples
();
for
(
long
r
=
rect
.
top
();
r
<=
rect
.
bottom
();
++
r
)
{
for
(
long
c
=
rect
.
left
();
c
<=
rect
.
right
();
++
c
)
{
auto
idx
=
r
*
nc
+
c
;
d
[
idx
]
=
s1
[
idx
]
*
A
+
s2
[
idx
]
*
B
+
s3
[
idx
]
*
C
;
}
}
}
// -----------------------------------------------------------------------------------
void
compute_adam_update
(
...
...
dlib/dnn/cpu_dlib.h
View file @
67170338
...
...
@@ -7,6 +7,7 @@
// and cudnn_dlibapi.h
#include "tensor.h"
#include "../geometry/rectangle.h"
namespace
dlib
{
...
...
@@ -111,6 +112,19 @@ namespace dlib
const
tensor
&
B
);
// -----------------------------------------------------------------------------------
void
affine_transform
(
const
rectangle
&
rect
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
float
A
,
float
B
,
float
C
);
// -----------------------------------------------------------------------------------
void
compute_adam_update
(
...
...
dlib/dnn/cuda_dlib.cu
View file @
67170338
...
...
@@ -575,6 +575,57 @@ namespace dlib
launch_kernel
(
_cuda_affine_transform1_0
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src
.
device
(),
src
.
size
(),
A
);
}
// ----------------------------------------------------------------------------------------
__global__
void
_cuda_affine_transform_rect
(
float
*
d
,
const
float
*
s1
,
const
float
*
s2
,
const
float
*
s3
,
float
A
,
float
B
,
float
C
,
size_t
start_idx
,
size_t
n
,
size_t
rect_nc
,
size_t
total_nc
)
{
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
size_t
r
=
i
/
rect_nc
;
size_t
c
=
i
%
rect_nc
;
size_t
idx
=
r
*
total_nc
+
c
+
start_idx
;
d
[
idx
]
=
A
*
s1
[
idx
]
+
B
*
s2
[
idx
]
+
C
*
s3
[
idx
];
}
}
void
affine_transform
(
const
rectangle
&
rect
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
float
A
,
float
B
,
float
C
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src1
.
size
());
DLIB_CASSERT
(
dest
.
size
()
==
src2
.
size
());
DLIB_CASSERT
(
dest
.
size
()
==
src3
.
size
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src1
.
num_samples
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src2
.
num_samples
());
DLIB_CASSERT
(
dest
.
num_samples
()
==
src3
.
num_samples
());
DLIB_CASSERT
(
rectangle
(
0
,
0
,
dest
.
size
()
/
dest
.
num_samples
()
-
1
,
dest
.
num_samples
()
-
1
).
contains
(
rect
));
launch_kernel
(
_cuda_affine_transform_rect
,
max_jobs
(
rect
.
area
()),
dest
.
device
(),
src1
.
device
(),
src2
.
device
(),
src3
.
device
(),
A
,
B
,
C
,
rect
.
left
()
+
rect
.
top
()
*
(
dest
.
size
()
/
dest
.
num_samples
()),
rect
.
area
(),
rect
.
width
(),
dest
.
size
()
/
dest
.
num_samples
());
}
// ----------------------------------------------------------------------------------------
__global__
void
_cuda_affine_transform4
(
float
*
d
,
const
float
*
s1
,
const
float
*
s2
,
size_t
n
,
float
A
,
float
B
,
float
C
)
...
...
dlib/dnn/cuda_dlib.h
View file @
67170338
...
...
@@ -5,6 +5,7 @@
#include "tensor.h"
#include "../geometry/rectangle.h"
namespace
dlib
{
...
...
@@ -230,6 +231,17 @@ namespace dlib
const
float
C
);
void
affine_transform
(
const
rectangle
&
rect
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
float
A
,
float
B
,
float
C
);
// Note that this function isn't in the tt:: namespace because add_scaled() is
// called by cuda::add() so we don't need a tt:: version of add_scaled().
void
add_scaled
(
...
...
dlib/dnn/tensor_tools.cpp
View file @
67170338
...
...
@@ -353,6 +353,24 @@ namespace dlib { namespace tt
#endif
}
void
affine_transform
(
const
rectangle
&
rect
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
float
A
,
float
B
,
float
C
)
{
#ifdef DLIB_USE_CUDA
cuda
::
affine_transform
(
rect
,
dest
,
src1
,
src2
,
src3
,
A
,
B
,
C
);
#else
cpu
::
affine_transform
(
rect
,
dest
,
src1
,
src2
,
src3
,
A
,
B
,
C
);
#endif
}
void
affine_transform
(
tensor
&
dest
,
const
tensor
&
src1
,
...
...
dlib/dnn/tensor_tools.h
View file @
67170338
...
...
@@ -11,6 +11,7 @@
#include "cuda_dlib.h"
#include "../rand.h"
#include <memory>
#include "../geometry/rectangle.h"
namespace
dlib
{
...
...
@@ -356,6 +357,34 @@ namespace dlib { namespace tt
- #dest.host()[i] == A*src1.host()[i] + B*src2.host()[i] + C*src3.host()[i]
!*/
void
affine_transform
(
const
rectangle
&
rect
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
float
A
,
float
B
,
float
C
);
/*!
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
- dest.num_samples()==src1.num_samples()
- dest.num_samples()==src2.num_samples()
- dest.num_samples()==src3.num_samples()
- get_rect(mat(dest)).contains(rect) == true
(i.e. rect must be entirely contained within dest)
ensures
- This function operates much like
affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only
the sub-rectangle indicated by rect. In particular, this function is equivalent
to:
set_subm(dest,rect) = A*subm(mat(src1),rect) + B*subm(mat(src2),rect) + C*subm(mat(src3),rect)
!*/
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
dlib/test/dnn.cpp
View file @
67170338
...
...
@@ -698,6 +698,45 @@ namespace
#ifdef DLIB_USE_CUDA
void
test_affine_rect
()
{
dlib
::
rand
rnd
;
for
(
int
iter
=
0
;
iter
<
20
;
++
iter
)
{
long
nr
=
1
+
rnd
.
get_random_32bit_number
()
%
10
;
long
nc
=
1
+
rnd
.
get_random_32bit_number
()
%
10
;
resizable_tensor
dest1
(
nr
,
nc
),
dest2
(
nr
,
nc
),
src1
(
nr
,
nc
),
src2
(
nr
,
nc
),
src3
(
nr
,
nc
);
matrix
<
float
>
dest3
;
dest1
=
1
;
dest2
=
1
;
dest3
=
mat
(
dest1
);
src1
=
2
;
src2
=
3
;
src3
=
4
;
point
p1
(
rnd
.
get_random_32bit_number
()
%
nc
,
rnd
.
get_random_32bit_number
()
%
nr
);
point
p2
(
rnd
.
get_random_32bit_number
()
%
nc
,
rnd
.
get_random_32bit_number
()
%
nr
);
rectangle
rect
(
p1
,
p2
);
cuda
::
affine_transform
(
rect
,
dest1
,
src1
,
src2
,
src3
,
2
,
3
,
4
);
cpu
::
affine_transform
(
rect
,
dest2
,
src1
,
src2
,
src3
,
2
,
3
,
4
);
DLIB_TEST
(
mat
(
dest1
)
==
mat
(
dest2
));
set_subm
(
dest3
,
rect
)
=
2
*
subm
(
mat
(
src1
),
rect
)
+
3
*
subm
(
mat
(
src2
),
rect
)
+
4
*
subm
(
mat
(
src3
),
rect
);
DLIB_TEST
(
dest3
==
mat
(
dest1
));
dest1
=
1
;
tt
::
affine_transform
(
rect
,
dest1
,
src1
,
src2
,
src3
,
2
,
3
,
4
);
DLIB_TEST
(
dest3
==
mat
(
dest1
));
}
}
void
test_conv
()
{
cuda
::
tensor_conv
conv1
;
...
...
@@ -1865,6 +1904,7 @@ namespace
test_tagging
();
#ifdef DLIB_USE_CUDA
test_affine_rect
();
test_conv
();
test_more_ops2
();
test_more_ops
(
1
,
1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment