"docs/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "82be58c51272dcc7ebd5cbf8f48d444e3df96a1a"
Unverified Commit bf427f56 authored by Juha Reunanen's avatar Juha Reunanen Committed by GitHub
Browse files

Problem: `Error while calling cudnnGetConvolutionForwardWorkspaceSize(...

Problem: `Error while calling cudnnGetConvolutionForwardWorkspaceSize( context(), descriptor(data), (const cudnnFilterDescriptor_t)filter_handle, (const cudnnConvolutionDescriptor_t)conv_handle, descriptor(dest_desc), (cudnnConvolutionFwdAlgo_t)forward_algo, &forward_workspace_size_in_bytes) in file C:\a\2\s\3rdparty\dlib\dlib\cuda\cudnn_dlibapi.cpp:1029. code: 9, reason: CUDNN_STATUS_NOT_SUPPORTED` (#2532)

Solution: when this happens, select the best algorithms again - but this time bypassing the cache
parent 5f7e19b7
...@@ -787,10 +787,11 @@ namespace dlib ...@@ -787,10 +787,11 @@ namespace dlib
void tensor_conv:: void tensor_conv::
select_best_algorithms ( select_best_algorithms (
const tensor& data, const tensor& data,
const tensor_descriptor& dest_desc const tensor_descriptor& dest_desc,
allow_cache_use allow_cache_use
) )
{ {
// Calling the cuDNN "find the best algorithm" functions are really slow. So we keep a // Calling the cuDNN "find the best algorithm" functions is really slow. So we keep a
// cache that tells us what method was best for a particular configuration. // cache that tells us what method was best for a particular configuration.
thread_local std::map<std::tuple<int,int,int,int,long,long>, thread_local std::map<std::tuple<int,int,int,int,long,long>,
std::tuple<int,int,int>> config_to_algo_cache; std::tuple<int,int,int>> config_to_algo_cache;
...@@ -799,7 +800,7 @@ namespace dlib ...@@ -799,7 +800,7 @@ namespace dlib
// the cache. // the cache.
const auto cache_key = std::make_tuple(stride_y, stride_x, padding_y, padding_x, filters_nr, filters_nc); const auto cache_key = std::make_tuple(stride_y, stride_x, padding_y, padding_x, filters_nr, filters_nc);
const auto iter = config_to_algo_cache.find(cache_key); const auto iter = config_to_algo_cache.find(cache_key);
if (iter != config_to_algo_cache.end()) if (iter != config_to_algo_cache.end() && allow_cache_use == allow_cache_use::yes)
{ {
std::tie(forward_algo, backward_data_algo, backward_filters_algo) = iter->second; std::tie(forward_algo, backward_data_algo, backward_filters_algo) = iter->second;
return; return;
...@@ -933,6 +934,40 @@ namespace dlib ...@@ -933,6 +934,40 @@ namespace dlib
config_to_algo_cache[cache_key] = std::make_tuple(forward_algo, backward_data_algo, backward_filters_algo); config_to_algo_cache[cache_key] = std::make_tuple(forward_algo, backward_data_algo, backward_filters_algo);
} }
void tensor_conv::
update_convolution_data_workspace_sizes(
const tensor& data,
const tensor_descriptor& dest_desc
)
{
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
context(),
descriptor(data),
(const cudnnFilterDescriptor_t)filter_handle,
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(dest_desc),
(cudnnConvolutionFwdAlgo_t)forward_algo,
&forward_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
context(),
(const cudnnFilterDescriptor_t)filter_handle,
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
&backward_data_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
context(),
descriptor(data),
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
&backward_filters_workspace_size_in_bytes));
}
void tensor_conv:: void tensor_conv::
setup( setup(
const tensor& data, const tensor& data,
...@@ -1021,36 +1056,18 @@ namespace dlib ...@@ -1021,36 +1056,18 @@ namespace dlib
tensor_descriptor dest_desc; tensor_descriptor dest_desc;
dest_desc.set_size(out_num_samples,out_k,out_nr,out_nc); dest_desc.set_size(out_num_samples,out_k,out_nr,out_nc);
select_best_algorithms(data, dest_desc); try
{
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize( select_best_algorithms(data, dest_desc, allow_cache_use::yes);
context(), update_convolution_data_workspace_sizes(data, dest_desc);
descriptor(data), }
(const cudnnFilterDescriptor_t)filter_handle, catch (dlib::cudnn_error&)
(const cudnnConvolutionDescriptor_t)conv_handle, {
descriptor(dest_desc), // Sometimes the values stored in `config_to_algo_cache` do not quite work -
(cudnnConvolutionFwdAlgo_t)forward_algo, // so let's get a fresh estimate, instead of using a cached value.
&forward_workspace_size_in_bytes)); select_best_algorithms(data, dest_desc, allow_cache_use::no);
update_convolution_data_workspace_sizes(data, dest_desc);
}
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
context(),
(const cudnnFilterDescriptor_t)filter_handle,
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
&backward_data_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
context(),
descriptor(data),
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
&backward_filters_workspace_size_in_bytes));
} }
catch(...) catch(...)
{ {
......
...@@ -254,15 +254,20 @@ namespace dlib ...@@ -254,15 +254,20 @@ namespace dlib
int out_nr; int out_nr;
int out_nc; int out_nc;
enum class allow_cache_use { no, yes };
// sets the three _algo fields. // sets the three _algo fields.
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc); void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc, allow_cache_use allow_cache_use);
int forward_algo; int forward_algo;
int backward_data_algo; int backward_data_algo;
int backward_filters_algo; int backward_filters_algo;
// sets the three _workspace_size_in_bytes fields.
void update_convolution_data_workspace_sizes(const tensor& data, const tensor_descriptor& dest_desc);
size_t forward_workspace_size_in_bytes; size_t forward_workspace_size_in_bytes;
size_t backward_data_workspace_size_in_bytes; size_t backward_data_workspace_size_in_bytes;
size_t backward_filters_workspace_size_in_bytes; size_t backward_filters_workspace_size_in_bytes;
cuda_data_void_ptr forward_workspace; cuda_data_void_ptr forward_workspace;
cuda_data_void_ptr backward_data_workspace; cuda_data_void_ptr backward_data_workspace;
cuda_data_void_ptr backward_filters_workspace; cuda_data_void_ptr backward_filters_workspace;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment