Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dlib
Commits
fe803b56
Commit
fe803b56
authored
Jun 20, 2020
by
Davis King
Browse files
add support for cudnn 8.0
parent
f8887d8c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
85 additions
and
0 deletions
+85
-0
dlib/cuda/cudnn_dlibapi.cpp
dlib/cuda/cudnn_dlibapi.cpp
+85
-0
No files found.
dlib/cuda/cudnn_dlibapi.cpp
View file @
fe803b56
...
@@ -751,6 +751,31 @@ namespace dlib
...
@@ -751,6 +751,31 @@ namespace dlib
backward_filters_workspace
.
reset
();
backward_filters_workspace
.
reset
();
}
}
// Given an array of cudnn algorithm performance results, like
// cudnnConvolutionFwdAlgoPerf_t, pick the best one to use.
template
<
typename
T
>
decltype
(
std
::
declval
<
T
>
().
algo
)
pick_best_algorithm
(
const
std
::
vector
<
T
>
&
perf_results
)
{
DLIB_CASSERT
(
!
perf_results
.
empty
());
CHECK_CUDNN
(
perf_results
[
0
].
status
);
if
(
dnn_prefer_fastest_algorithms
())
return
perf_results
[
0
].
algo
;
// Otherwise we find the algorithm that has a good status and uses the least amount
// of memory.
size_t
best_memory
=
std
::
numeric_limits
<
size_t
>::
max
();
decltype
(
std
::
declval
<
T
>
().
algo
)
best_alg
;
for
(
auto
&&
perf
:
perf_results
)
{
if
(
perf
.
status
==
CUDNN_STATUS_SUCCESS
&&
perf
.
memory
<
best_memory
)
{
best_memory
=
perf
.
memory
;
best_alg
=
perf
.
algo
;
}
}
return
best_alg
;
}
void
tensor_conv
::
void
tensor_conv
::
setup
(
setup
(
const
tensor
&
data
,
const
tensor
&
data
,
...
@@ -841,6 +866,25 @@ namespace dlib
...
@@ -841,6 +866,25 @@ namespace dlib
// Pick which forward algorithm we will use and allocate the necessary
// Pick which forward algorithm we will use and allocate the necessary
// workspace buffer.
// workspace buffer.
cudnnConvolutionFwdAlgo_t
forward_best_algo
;
cudnnConvolutionFwdAlgo_t
forward_best_algo
;
#if CUDNN_MAJOR >= 8
{
int
num_possilbe_algorithms
=
0
;
CHECK_CUDNN
(
cudnnGetConvolutionForwardAlgorithmMaxCount
(
context
(),
&
num_possilbe_algorithms
));
std
::
vector
<
cudnnConvolutionFwdAlgoPerf_t
>
perf_results
(
num_possilbe_algorithms
);
int
num_algorithms
=
0
;
CHECK_CUDNN
(
cudnnFindConvolutionForwardAlgorithm
(
context
(),
descriptor
(
data
),
(
const
cudnnFilterDescriptor_t
)
filter_handle
,
(
const
cudnnConvolutionDescriptor_t
)
conv_handle
,
descriptor
(
dest_desc
),
num_possilbe_algorithms
,
&
num_algorithms
,
perf_results
.
data
()));
perf_results
.
resize
(
num_algorithms
);
forward_best_algo
=
pick_best_algorithm
(
perf_results
);
}
#else
CHECK_CUDNN
(
cudnnGetConvolutionForwardAlgorithm
(
CHECK_CUDNN
(
cudnnGetConvolutionForwardAlgorithm
(
context
(),
context
(),
descriptor
(
data
),
descriptor
(
data
),
...
@@ -850,6 +894,7 @@ namespace dlib
...
@@ -850,6 +894,7 @@ namespace dlib
dnn_prefer_fastest_algorithms
()
?
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST
:
CUDNN_CONVOLUTION_FWD_NO_WORKSPACE
,
dnn_prefer_fastest_algorithms
()
?
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST
:
CUDNN_CONVOLUTION_FWD_NO_WORKSPACE
,
std
::
numeric_limits
<
size_t
>::
max
(),
std
::
numeric_limits
<
size_t
>::
max
(),
&
forward_best_algo
));
&
forward_best_algo
));
#endif
forward_algo
=
forward_best_algo
;
forward_algo
=
forward_best_algo
;
CHECK_CUDNN
(
cudnnGetConvolutionForwardWorkspaceSize
(
CHECK_CUDNN
(
cudnnGetConvolutionForwardWorkspaceSize
(
context
(),
context
(),
...
@@ -863,6 +908,25 @@ namespace dlib
...
@@ -863,6 +908,25 @@ namespace dlib
// Pick which backward data algorithm we will use and allocate the
// Pick which backward data algorithm we will use and allocate the
// necessary workspace buffer.
// necessary workspace buffer.
cudnnConvolutionBwdDataAlgo_t
backward_data_best_algo
;
cudnnConvolutionBwdDataAlgo_t
backward_data_best_algo
;
#if CUDNN_MAJOR >= 8
{
int
num_possilbe_algorithms
=
0
;
CHECK_CUDNN
(
cudnnGetConvolutionBackwardFilterAlgorithmMaxCount
(
context
(),
&
num_possilbe_algorithms
));
std
::
vector
<
cudnnConvolutionBwdDataAlgoPerf_t
>
perf_results
(
num_possilbe_algorithms
);
int
num_algorithms
=
0
;
CHECK_CUDNN
(
cudnnFindConvolutionBackwardDataAlgorithm
(
context
(),
(
const
cudnnFilterDescriptor_t
)
filter_handle
,
descriptor
(
dest_desc
),
(
const
cudnnConvolutionDescriptor_t
)
conv_handle
,
descriptor
(
data
),
num_possilbe_algorithms
,
&
num_algorithms
,
perf_results
.
data
()));
perf_results
.
resize
(
num_algorithms
);
backward_data_best_algo
=
pick_best_algorithm
(
perf_results
);
}
#else
CHECK_CUDNN
(
cudnnGetConvolutionBackwardDataAlgorithm
(
CHECK_CUDNN
(
cudnnGetConvolutionBackwardDataAlgorithm
(
context
(),
context
(),
(
const
cudnnFilterDescriptor_t
)
filter_handle
,
(
const
cudnnFilterDescriptor_t
)
filter_handle
,
...
@@ -872,6 +936,7 @@ namespace dlib
...
@@ -872,6 +936,7 @@ namespace dlib
dnn_prefer_fastest_algorithms
()
?
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST
:
CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE
,
dnn_prefer_fastest_algorithms
()
?
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST
:
CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE
,
std
::
numeric_limits
<
size_t
>::
max
(),
std
::
numeric_limits
<
size_t
>::
max
(),
&
backward_data_best_algo
));
&
backward_data_best_algo
));
#endif
backward_data_algo
=
backward_data_best_algo
;
backward_data_algo
=
backward_data_best_algo
;
CHECK_CUDNN
(
cudnnGetConvolutionBackwardDataWorkspaceSize
(
CHECK_CUDNN
(
cudnnGetConvolutionBackwardDataWorkspaceSize
(
...
@@ -886,6 +951,25 @@ namespace dlib
...
@@ -886,6 +951,25 @@ namespace dlib
// Pick which backward filters algorithm we will use and allocate the
// Pick which backward filters algorithm we will use and allocate the
// necessary workspace buffer.
// necessary workspace buffer.
cudnnConvolutionBwdFilterAlgo_t
backward_filters_best_algo
;
cudnnConvolutionBwdFilterAlgo_t
backward_filters_best_algo
;
#if CUDNN_MAJOR >= 8
{
int
num_possilbe_algorithms
=
0
;
CHECK_CUDNN
(
cudnnGetConvolutionBackwardFilterAlgorithmMaxCount
(
context
(),
&
num_possilbe_algorithms
));
std
::
vector
<
cudnnConvolutionBwdFilterAlgoPerf_t
>
perf_results
(
num_possilbe_algorithms
);
int
num_algorithms
=
0
;
CHECK_CUDNN
(
cudnnFindConvolutionBackwardFilterAlgorithm
(
context
(),
descriptor
(
data
),
descriptor
(
dest_desc
),
(
const
cudnnConvolutionDescriptor_t
)
conv_handle
,
(
const
cudnnFilterDescriptor_t
)
filter_handle
,
num_possilbe_algorithms
,
&
num_algorithms
,
perf_results
.
data
()));
perf_results
.
resize
(
num_algorithms
);
backward_filters_best_algo
=
pick_best_algorithm
(
perf_results
);
}
#else
CHECK_CUDNN
(
cudnnGetConvolutionBackwardFilterAlgorithm
(
CHECK_CUDNN
(
cudnnGetConvolutionBackwardFilterAlgorithm
(
context
(),
context
(),
descriptor
(
data
),
descriptor
(
data
),
...
@@ -895,6 +979,7 @@ namespace dlib
...
@@ -895,6 +979,7 @@ namespace dlib
dnn_prefer_fastest_algorithms
()
?
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST
:
CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE
,
dnn_prefer_fastest_algorithms
()
?
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST
:
CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE
,
std
::
numeric_limits
<
size_t
>::
max
(),
std
::
numeric_limits
<
size_t
>::
max
(),
&
backward_filters_best_algo
));
&
backward_filters_best_algo
));
#endif
// cuDNN 5.1 has a bug that causes
// cuDNN 5.1 has a bug that causes
// cudnnGetConvolutionBackwardFilterAlgorithm() to pick the winograd
// cudnnGetConvolutionBackwardFilterAlgorithm() to pick the winograd
// algorithm even for cases where cuDNN doesn't support it, leading to
// algorithm even for cases where cuDNN doesn't support it, leading to
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment