"examples/pytorch/vscode:/vscode.git/clone" did not exist on "e01893972901e45f0cae3da6c90bb1fce6947568"
Unverified Commit bf427f56 authored by Juha Reunanen's avatar Juha Reunanen Committed by GitHub
Browse files

Problem: `Error while calling cudnnGetConvolutionForwardWorkspaceSize(...

Problem: `Error while calling cudnnGetConvolutionForwardWorkspaceSize( context(), descriptor(data), (const cudnnFilterDescriptor_t)filter_handle, (const cudnnConvolutionDescriptor_t)conv_handle, descriptor(dest_desc), (cudnnConvolutionFwdAlgo_t)forward_algo, &forward_workspace_size_in_bytes) in file C:\a\2\s\3rdparty\dlib\dlib\cuda\cudnn_dlibapi.cpp:1029. code: 9, reason: CUDNN_STATUS_NOT_SUPPORTED` (#2532)

Solution: when this happens, select the best algorithms again - but this time bypassing the cache
parent 5f7e19b7
......@@ -787,10 +787,11 @@ namespace dlib
void tensor_conv::
select_best_algorithms (
const tensor& data,
const tensor_descriptor& dest_desc
const tensor_descriptor& dest_desc,
allow_cache_use allow_cache_use
)
{
// Calling the cuDNN "find the best algorithm" functions are really slow. So we keep a
// Calling the cuDNN "find the best algorithm" functions is really slow. So we keep a
// cache that tells us what method was best for a particular configuration.
thread_local std::map<std::tuple<int,int,int,int,long,long>,
std::tuple<int,int,int>> config_to_algo_cache;
......@@ -799,7 +800,7 @@ namespace dlib
// the cache.
const auto cache_key = std::make_tuple(stride_y, stride_x, padding_y, padding_x, filters_nr, filters_nc);
const auto iter = config_to_algo_cache.find(cache_key);
if (iter != config_to_algo_cache.end())
if (iter != config_to_algo_cache.end() && allow_cache_use == allow_cache_use::yes)
{
std::tie(forward_algo, backward_data_algo, backward_filters_algo) = iter->second;
return;
......@@ -933,6 +934,40 @@ namespace dlib
config_to_algo_cache[cache_key] = std::make_tuple(forward_algo, backward_data_algo, backward_filters_algo);
}
void tensor_conv::
update_convolution_data_workspace_sizes(
const tensor& data,
const tensor_descriptor& dest_desc
)
{
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
context(),
descriptor(data),
(const cudnnFilterDescriptor_t)filter_handle,
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(dest_desc),
(cudnnConvolutionFwdAlgo_t)forward_algo,
&forward_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
context(),
(const cudnnFilterDescriptor_t)filter_handle,
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
&backward_data_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
context(),
descriptor(data),
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
&backward_filters_workspace_size_in_bytes));
}
void tensor_conv::
setup(
const tensor& data,
......@@ -1021,36 +1056,18 @@ namespace dlib
tensor_descriptor dest_desc;
dest_desc.set_size(out_num_samples,out_k,out_nr,out_nc);
select_best_algorithms(data, dest_desc);
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
context(),
descriptor(data),
(const cudnnFilterDescriptor_t)filter_handle,
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(dest_desc),
(cudnnConvolutionFwdAlgo_t)forward_algo,
&forward_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
context(),
(const cudnnFilterDescriptor_t)filter_handle,
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
&backward_data_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
context(),
descriptor(data),
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
&backward_filters_workspace_size_in_bytes));
try
{
select_best_algorithms(data, dest_desc, allow_cache_use::yes);
update_convolution_data_workspace_sizes(data, dest_desc);
}
catch (dlib::cudnn_error&)
{
// Sometimes the values stored in `config_to_algo_cache` do not quite work -
// so let's get a fresh estimate, instead of using a cached value.
select_best_algorithms(data, dest_desc, allow_cache_use::no);
update_convolution_data_workspace_sizes(data, dest_desc);
}
}
catch(...)
{
......
......@@ -254,15 +254,20 @@ namespace dlib
int out_nr;
int out_nc;
enum class allow_cache_use { no, yes };
// sets the three _algo fields.
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc);
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc, allow_cache_use allow_cache_use);
int forward_algo;
int backward_data_algo;
int backward_filters_algo;
// sets the three _workspace_size_in_bytes fields.
void update_convolution_data_workspace_sizes(const tensor& data, const tensor_descriptor& dest_desc);
size_t forward_workspace_size_in_bytes;
size_t backward_data_workspace_size_in_bytes;
size_t backward_filters_workspace_size_in_bytes;
cuda_data_void_ptr forward_workspace;
cuda_data_void_ptr backward_data_workspace;
cuda_data_void_ptr backward_filters_workspace;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment