Unverified Commit 97f7b1df authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: microbenchmark - add auto selecting algorithm support for cudnn functions (#540)

**Description**
add auto selecting algorithm support for cudnn functions.

**Major Revision**
- add auto selecting algorithm support for cudnn functions in source
code
- add 'auto_algo' option in benchmark
- add related test
parent c7d0beaf
......@@ -357,6 +357,13 @@ def add_parser_arguments(self):
required=False,
help='The custom json string defining the params in a cudnn function.',
)
self._parser.add_argument(
'--enable_auto_algo',
action='store_true',
default=False,
required=False,
help='Whether to use auto algorithm selection.'
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
......@@ -373,6 +380,8 @@ def _preprocess(self):
command += (' --warm_up ' + str(self._args.num_warmup))
command += (' --num_in_step ' + str(self._args.num_in_step))
command += (' --random_seed ' + str(self._args.random_seed))
if self._args.enable_auto_algo:
command += (' --enable_auto_algo')
try:
if not self._args.config_json_str:
......
......@@ -32,6 +32,18 @@ template <typename T1, typename T2> class ConvolutionBackwardDataFunction : publ
this->h_desc_.desc(), this->bwd_data_algo_, &this->fwd_workspace_size_));
}
/**
* @brief Find the best algorithm for cudnn convolution functions
*/
virtual void find_best_algo() {
int algo_count;
cudnnConvolutionBwdDataAlgoPerf_t perf_results;
CHECK_CUDNN_ERROR(cudnnFindConvolutionBackwardDataAlgorithm(
this->cudnn_handle, this->w_desc_.desc(), this->x_desc_.desc(), this->conv_desc_.desc(),
this->h_desc_.desc(), 1, &algo_count, &perf_results));
this->algo_ = perf_results.algo;
}
public:
/**
* @brief Construct a new Convolution Backward Data Function object
......
......@@ -31,6 +31,17 @@ template <typename T1, typename T2> class ConvolutionBackwardFilterFunction : pu
this->cudnn_handle, this->x_desc_.desc(), this->h_desc_.desc(), this->conv_desc_.desc(),
this->w_desc_.desc(), this->bwd_filter_algo_, &this->fwd_workspace_size_));
}
/**
* @brief Find the best algorithm for cudnn convolution functions
*/
virtual void find_best_algo() {
int algo_count;
cudnnConvolutionBwdFilterAlgoPerf_t perf_results;
CHECK_CUDNN_ERROR(cudnnFindConvolutionBackwardFilterAlgorithm(
this->cudnn_handle, this->x_desc_.desc(), this->h_desc_.desc(), this->conv_desc_.desc(),
this->w_desc_.desc(), 1, &algo_count, &perf_results));
this->algo_ = perf_results.algo;
}
public:
/**
......
......@@ -31,6 +31,17 @@ template <typename T1, typename T2> class ConvolutionForwardFunction : public Cu
this->cudnn_handle, this->x_desc_.desc(), this->w_desc_.desc(), this->conv_desc_.desc(),
this->h_desc_.desc(), this->fwd_algo_, &this->fwd_workspace_size_));
}
/**
* @brief Find the best algorithm for cudnn convolution functions
*/
virtual void find_best_algo() {
int algo_count;
cudnnConvolutionFwdAlgoPerf_t perf_results;
CHECK_CUDNN_ERROR(cudnnFindConvolutionForwardAlgorithm(this->cudnn_handle, this->x_desc_.desc(),
this->w_desc_.desc(), this->conv_desc_.desc(),
this->h_desc_.desc(), 1, &algo_count, &perf_results));
this->algo_ = perf_results.algo;
}
public:
/**
......
......@@ -58,6 +58,7 @@ class CudnnConfig {
cudnnDataType_t input_type_; ///< selects the data type in which the computation will be done
cudnnDataType_t conv_type_; ///< selects the data type in which the convolution will be done
std::string function_str_; ///< the str representing the cudnn function with params
bool auto_algo_; ///< whether to use auto algo selection
public:
void set_num_test(int num_test) { this->num_test = num_test; }
......@@ -80,6 +81,7 @@ class CudnnConfig {
void set_input_type(const cudnnDataType_t &input_type) { input_type_ = input_type; }
void set_conv_type(const cudnnDataType_t &conv_type) { input_type_ = conv_type; }
void set_function(const std::string &str) { function_str_ = str; }
void set_auto_algo(bool auto_algo) { auto_algo_ = auto_algo; }
std::vector<int> &get_input_dims() { return input_dims_; }
std::vector<int> &get_input_stride() { return input_stride_; }
......@@ -98,6 +100,7 @@ class CudnnConfig {
std::string &get_name() { return name; }
cudnn_function_name_enum get_e_name() { return e_name; }
std::string &get_function_str() { return function_str_; }
bool get_auto_algo() { return auto_algo_; }
/**
* @brief Convert name string to enum name
* @return cudnn_function_name_enum
......
......@@ -45,6 +45,10 @@ template <typename T1, typename T2> class CudnnFunction : public CudnnConfig {
* @brief launch the kernel/function
*/
virtual void kernel_entry() {}
/**
* @brief Find the best algorithm for cudnn convolution functions
*/
virtual void find_best_algo() {}
public:
/**
......@@ -87,6 +91,9 @@ template <typename T1, typename T2> void CudnnFunction<T1, T2>::prepare_for_func
// Set Convolution MathType
cudnnMathType_t algo = get_use_tensor_op() ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH;
CHECK_CUDNN_ERROR(cudnnSetConvolutionMathType(conv_desc_.desc(), algo));
if (this->auto_algo_) {
find_best_algo();
}
// Set convolution algorithm and workspace size
this->get_workspace_size();
zeros<float>(&fwd_workspace_, std::vector<int>{static_cast<int>(this->fwd_workspace_size_ / sizeof(float)), 1});
......
......@@ -67,12 +67,24 @@ class Options {
return "";
}
/** @brief Get the bool type value of cmd line argument
* @param option the cmd line argument
* @return bool the bool type value of cmd line argument 'option'
*/
bool get_cmd_line_argument_bool(const std::string &option) {
if (std::find(begin, end, option) != end) {
return true;
}
return false;
}
public:
int num_test;
int warm_up;
int num_in_step;
int random_seed;
std::string para_info_json;
bool auto_algo;
/**
* @brief Construct a new Command Line object
......@@ -91,6 +103,7 @@ class Options {
random_seed = get_cmd_line_argument_int("--random_seed");
random_seed = (random_seed == 0 ? time(NULL) : random_seed);
para_info_json = get_cmd_line_argument_string("--config_json");
auto_algo = get_cmd_line_argument_bool("--enable_auto_algo");
para_info_json =
para_info_json == ""
? R"({"algo":0,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],"filterDims":[32,128,3,3],"inputDims":[32,128,14,14],"inputStride":[25088,196,14,1],"inputType":0,"mode":1, "name":"cudnnConvolutionBackwardFilter","outputDims":[32,32,14,14],"outputStride":[6272,196,14,1],"padA":[1,1],"tensorOp":false})"
......@@ -126,8 +139,10 @@ void from_json(const json &j, cudnn_test::CudnnConfig &fn) {
fn.set_input_stride(input_stride);
auto output_stride = j.at("outputStride").get<std::vector<int>>();
fn.set_output_stride(output_stride);
auto algo = j.at("algo").get<int>();
fn.set_algo(algo);
if (j.contains("algo")) {
auto algo = j.at("algo").get<int>();
fn.set_algo(algo);
}
auto padA = j.at("padA").get<std::vector<int>>();
fn.set_padA(padA);
auto filter_strideA = j.at("filterStrideA").get<std::vector<int>>();
......@@ -178,6 +193,7 @@ void run_benchmark(Options &options) {
function.set_warm_up(options.warm_up);
function.set_num_in_step(options.num_in_step);
function.set_random_seed(options.random_seed);
function.set_auto_algo(options.auto_algo);
if (function.get_input_type() == CUDNN_DATA_FLOAT && function.get_conv_type() == CUDNN_DATA_FLOAT) {
auto p_function = get_cudnn_function_pointer<float, float>(function);
p_function->benchmark();
......
......@@ -85,8 +85,7 @@ def test_cudnn_functions():
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
# Test for custom list configuration
# Test for custom list configuration
custom_config_str2 = '{"algo":1,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],' \
+ '"filterDims":[32,128,3,3],"inputDims":[32,32,14,14],"inputStride":[6272, 196, 14, 1],"inputType":2,'\
+ '"mode":1,"name":"cudnnConvolutionBackwardData","outputDims":[32, 128, 14, 14],'\
......@@ -126,3 +125,26 @@ def test_cudnn_functions():
assert (isinstance(benchmark.result[metric][0], numbers.Number))
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
# Test for auto_algo parameter
context = BenchmarkRegistry.create_benchmark_context(
'cudnn-function',
platform=Platform.CUDA,
parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --enable_auto_algo'
)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
benchmark = BenchmarkRegistry.launch_benchmark(context)
# Check basic information.
assert (benchmark)
assert (benchmark._args.enable_auto_algo is True)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (18 + benchmark.default_metric_count == len(benchmark.result))
for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment