Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
c3a74c7c
Commit
c3a74c7c
authored
May 21, 2016
by
Davis King
Browse files
Added affine_transform_range() and another overload of affine_transform()
parent
15b2d7b5
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
164 additions
and
1 deletion
+164
-1
dlib/dnn/cpu_dlib.cpp
dlib/dnn/cpu_dlib.cpp
+24
-0
dlib/dnn/cpu_dlib.h
dlib/dnn/cpu_dlib.h
+12
-0
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+34
-0
dlib/dnn/cuda_dlib.h
dlib/dnn/cuda_dlib.h
+12
-0
dlib/dnn/tensor_tools.cpp
dlib/dnn/tensor_tools.cpp
+36
-0
dlib/dnn/tensor_tools.h
dlib/dnn/tensor_tools.h
+46
-1
No files found.
dlib/dnn/cpu_dlib.cpp
View file @
c3a74c7c
...
...
@@ -385,6 +385,30 @@ namespace dlib
d
[
i
]
=
A
*
s1
[
i
]
+
B
*
s2
[
i
]
+
C
*
s3
[
i
]
+
D
;
}
void
affine_transform_range
(
size_t
begin
,
size_t
end
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src1
.
size
(),
""
);
DLIB_CASSERT
(
dest
.
size
()
==
src2
.
size
(),
""
);
DLIB_CASSERT
(
dest
.
size
()
==
src3
.
size
(),
""
);
DLIB_CASSERT
(
begin
<=
end
&&
end
<=
dest
.
size
(),
""
);
const
auto
d
=
dest
.
host
();
const
auto
s1
=
src1
.
host
();
const
auto
s2
=
src2
.
host
();
const
auto
s3
=
src3
.
host
();
for
(
size_t
i
=
begin
;
i
<
end
;
++
i
)
d
[
i
]
=
A
*
s1
[
i
]
+
B
*
s2
[
i
]
+
C
*
s3
[
i
];
}
// -----------------------------------------------------------------------------------
void
affine_transform
(
...
...
dlib/dnn/cpu_dlib.h
View file @
c3a74c7c
...
...
@@ -81,6 +81,18 @@ namespace dlib
const
float
D
);
void
affine_transform_range
(
size_t
begin
,
size_t
end
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
);
// -----------------------------------------------------------------------------------
void
affine_transform
(
...
...
dlib/dnn/cuda_dlib.cu
View file @
c3a74c7c
...
...
@@ -504,6 +504,40 @@ namespace dlib
src2
.
device
(),
src3
.
device
(),
dest
.
size
(),
A
,
B
,
C
,
D
);
}
// ----------------------------------------------------------------------------------------
__global__
void
_cuda_affine_transform_range
(
float
*
d
,
const
float
*
s1
,
const
float
*
s2
,
const
float
*
s3
,
size_t
begin
,
size_t
end
,
float
A
,
float
B
,
float
C
)
{
for
(
auto
i
:
grid_stride_range
(
begin
,
end
))
{
d
[
i
]
=
A
*
s1
[
i
]
+
B
*
s2
[
i
]
+
C
*
s3
[
i
];
}
}
void
affine_transform_range
(
size_t
begin
,
size_t
end
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src1
.
size
(),
""
);
DLIB_CASSERT
(
dest
.
size
()
==
src2
.
size
(),
""
);
DLIB_CASSERT
(
dest
.
size
()
==
src3
.
size
(),
""
);
DLIB_CASSERT
(
begin
<=
end
&&
end
<=
dest
.
size
(),
""
);
launch_kernel
(
_cuda_affine_transform_range
,
max_jobs
(
end
-
begin
),
dest
.
device
(),
src1
.
device
(),
src2
.
device
(),
src3
.
device
(),
begin
,
end
,
A
,
B
,
C
);
}
// -----------------------------------------------------------------------------------
__global__
void
_cuda_affine_transform2
(
float
*
d
,
const
float
*
s
,
size_t
n
,
const
float
*
A
,
const
float
*
B
)
...
...
dlib/dnn/cuda_dlib.h
View file @
c3a74c7c
...
...
@@ -164,6 +164,18 @@ namespace dlib
const
float
D
);
void
affine_transform_range
(
size_t
begin
,
size_t
end
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
);
// Note that this function isn't in the tt:: namespace because add_scaled() is
// called by cuda::add() so we don't need a tt:: version of add_scaled().
void
add_scaled
(
...
...
dlib/dnn/tensor_tools.cpp
View file @
c3a74c7c
...
...
@@ -240,6 +240,42 @@ namespace dlib { namespace tt
#endif
}
void
affine_transform_range
(
size_t
begin
,
size_t
end
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
)
{
#ifdef DLIB_USE_CUDA
cuda
::
affine_transform_range
(
begin
,
end
,
dest
,
src1
,
src2
,
src3
,
A
,
B
,
C
);
#else
cpu
::
affine_transform_range
(
begin
,
end
,
dest
,
src1
,
src2
,
src3
,
A
,
B
,
C
);
#endif
}
void
affine_transform
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
)
{
#ifdef DLIB_USE_CUDA
cuda
::
affine_transform_range
(
0
,
dest
.
size
(),
dest
,
src1
,
src2
,
src3
,
A
,
B
,
C
);
#else
cpu
::
affine_transform_range
(
0
,
dest
.
size
(),
dest
,
src1
,
src2
,
src3
,
A
,
B
,
C
);
#endif
}
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
dlib/dnn/tensor_tools.h
View file @
c3a74c7c
...
...
@@ -229,13 +229,58 @@ namespace dlib { namespace tt
const
float
D
);
/*!
requires - dest.size()==src1.size()
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
ensures
- #dest == A*src1 + B*src2 + C*src3 + D
!*/
void
affine_transform
(
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
);
/*!
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
ensures
- #dest == A*src1 + B*src2 + C*src3
!*/
void
affine_transform_range
(
size_t
begin
,
size_t
end
,
tensor
&
dest
,
const
tensor
&
src1
,
const
tensor
&
src2
,
const
tensor
&
src3
,
const
float
A
,
const
float
B
,
const
float
C
);
/*!
requires
- dest.size()==src1.size()
- dest.size()==src2.size()
- dest.size()==src3.size()
- begin <= end <= dest.size()
ensures
- This function operates much like
affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only
the half open range [begin,end) rather than processing the entire tensor.
Specifically, it does this:
- for i in the range [begin, end):
- #dest.host()[i] == A*src1.host()[i] + B*src2.host()[i] + C*src3.host()[i]
!*/
// ----------------------------------------------------------------------------------------
void
affine_transform
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment