Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
cc5a62cd
Commit
cc5a62cd
authored
Jan 24, 2016
by
Davis King
Browse files
Made affine_transform() routines a little faster.
parent
919cbd11
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
2 deletions
+24
-2
dlib/dnn/cuda_dlib.cu
dlib/dnn/cuda_dlib.cu
+24
-2
No files found.
dlib/dnn/cuda_dlib.cu
View file @
cc5a62cd
...
...
@@ -237,6 +237,14 @@ namespace dlib
}
}
__global__
void
_cuda_affine_transform1_0
(
float
*
d
,
const
float
*
s
,
size_t
n
,
float
A
)
{
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
d
[
i
]
=
A
*
s
[
i
];
}
}
void
affine_transform
(
tensor
&
dest
,
const
tensor
&
src
,
...
...
@@ -245,7 +253,10 @@ namespace dlib
)
{
DLIB_CASSERT
(
dest
.
size
()
==
src
.
size
(),
""
);
if
(
B
!=
0
)
launch_kernel
(
_cuda_affine_transform1
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src
.
device
(),
src
.
size
(),
A
,
B
);
else
launch_kernel
(
_cuda_affine_transform1_0
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src
.
device
(),
src
.
size
(),
A
);
}
// ----------------------------------------------------------------------------------------
...
...
@@ -258,6 +269,14 @@ namespace dlib
}
}
__global__
void
_cuda_affine_transform4_0
(
float
*
d
,
const
float
*
s1
,
const
float
*
s2
,
size_t
n
,
float
A
,
float
B
)
{
for
(
auto
i
:
grid_stride_range
(
0
,
n
))
{
d
[
i
]
=
A
*
s1
[
i
]
+
B
*
s2
[
i
];
}
}
void
affine_transform
(
tensor
&
dest
,
const
tensor
&
src1
,
...
...
@@ -269,7 +288,10 @@ namespace dlib
{
DLIB_CASSERT
(
dest
.
size
()
==
src1
.
size
(),
""
);
DLIB_CASSERT
(
dest
.
size
()
==
src2
.
size
(),
""
);
if
(
C
!=
0
)
launch_kernel
(
_cuda_affine_transform4
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src1
.
device
(),
src2
.
device
(),
dest
.
size
(),
A
,
B
,
C
);
else
launch_kernel
(
_cuda_affine_transform4_0
,
max_jobs
(
dest
.
size
()),
dest
.
device
(),
src1
.
device
(),
src2
.
device
(),
dest
.
size
(),
A
,
B
);
}
// ----------------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment