"docs/git@developer.sourcefind.cn:OpenDAS/deepspeed.git" did not exist on "7cab55c79178fce9a154abe4e7b5b7abd82648a1"
Commit 0a1464b6 authored by Davis King's avatar Davis King
Browse files

Added global functions for telling cuDNN to prefer fast or memory efficient

algorithms.
parent 2cfbd02c
......@@ -67,6 +67,31 @@ namespace dlib
(except computes it using a numerically accurate method)
!*/
// ----------------------------------------------------------------------------------------
bool dnn_prefer_fastest_algorithms(
);
/*!
ensures
- If dlib should prefer to use fast algorithms rather than ones that use less
RAM then this function returns true and false otherwise.
- On program startup this function will default to true.
!*/
void set_dnn_prefer_fastest_algorithms(
);
/*!
ensures
- #dnn_prefer_fastest_algorithms() == true
!*/
void set_dnn_prefer_smallest_algorithms(
);
/*!
ensures
- #dnn_prefer_fastest_algorithms() == false
!*/
// ----------------------------------------------------------------------------------------
template <
......
......@@ -13,6 +13,7 @@
#include "cuda_utils.h"
#include "cpu_dlib.h"
#include "cuda_dlib.h"
#include "tensor_tools.h"
static const char* cudnn_get_error_string(cudnnStatus_t s)
{
......@@ -773,7 +774,7 @@ namespace dlib
(const cudnnFilterDescriptor_t)filter_handle,
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(dest_desc),
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, // or CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_FWD_PREFER_FASTEST:CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
std::numeric_limits<size_t>::max(),
&forward_best_algo));
forward_algo = forward_best_algo;
......@@ -797,7 +798,7 @@ namespace dlib
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE,
std::numeric_limits<size_t>::max(),
&backward_data_best_algo));
backward_data_algo = backward_data_best_algo;
......@@ -821,7 +822,7 @@ namespace dlib
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE,
std::numeric_limits<size_t>::max(),
&backward_filters_best_algo));
backward_filters_algo = backward_filters_best_algo;
......
......@@ -5,6 +5,38 @@
#include "tensor_tools.h"
#include "../string.h"
#include <atomic>
namespace dlib
{
namespace
{
std::atomic<bool>& dnn_prefer_fastest_algo (
)
{
static std::atomic<bool> var(true);
return var;
}
}
bool dnn_prefer_fastest_algorithms (
)
{
return dnn_prefer_fastest_algo();
}
void set_dnn_prefer_fastest_algorithms(
)
{
dnn_prefer_fastest_algo() = true;
}
void set_dnn_prefer_smallest_algorithms(
)
{
dnn_prefer_fastest_algo() = false;
}
}
namespace dlib { namespace tt
{
......
......@@ -11,6 +11,13 @@
#include "cuda_dlib.h"
#include "../rand.h"
namespace dlib
{
bool dnn_prefer_fastest_algorithms();
void set_dnn_prefer_fastest_algorithms();
void set_dnn_prefer_smallest_algorithms();
}
namespace dlib { namespace tt
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment