Benchmarks: microbenchmark - add auto selecting algorithm support for cudnn functions (#540)

**Description** add auto selecting algorithm support for cudnn functions. **Major Revision** - add auto selecting algorithm support for cudnn functions in source code - add 'auto_algo' option in benchmark - add related test

Benchmarks: microbenchmark - add auto selecting algorithm support for cudnn functions (#540)
**Description** add auto selecting algorithm support for cudnn functions. **Major Revision** - add auto selecting algorithm support for cudnn functions in source code - add 'auto_algo' option in benchmark - add related test
97f7b1df · Yuting Jiang · GitHub · c7d0beaf · 97f7b1df · 97f7b1df
Unverified Commit 97f7b1df authored Jun 30, 2023 by Yuting Jiang Committed by GitHub Jun 30, 2023
8 changed files
--- a/superbench/benchmarks/micro_benchmarks/cudnn_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function.py
@@ -357,6 +357,13 @@ def add_parser_arguments(self):
            required=False,
            help='The custom json string defining the params in a cudnn function.',
        )
+        self._parser.add_argument(
+            '--enable_auto_algo',
+            action='store_true',
+            default=False,
+            required=False,
+            help='Whether to use auto algorithm selection.'
+        )

    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.
@@ -373,6 +380,8 @@ def _preprocess(self):
        command += (' --warm_up ' + str(self._args.num_warmup))
        command += (' --num_in_step ' + str(self._args.num_in_step))
        command += (' --random_seed ' + str(self._args.random_seed))
+        if self._args.enable_auto_algo:
+            command += (' --enable_auto_algo')

        try:
            if not self._args.config_json_str:

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function/convolution_backward_data.h
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function/convolution_backward_data.h
@@ -32,6 +32,18 @@ template <typename T1, typename T2> class ConvolutionBackwardDataFunction : publ
            this->h_desc_.desc(), this->bwd_data_algo_, &this->fwd_workspace_size_));
    }

+    /**
+     * @brief Find the best algorithm for cudnn convolution functions
+     */
+    virtual void find_best_algo() {
+        int algo_count;
+        cudnnConvolutionBwdDataAlgoPerf_t perf_results;
+        CHECK_CUDNN_ERROR(cudnnFindConvolutionBackwardDataAlgorithm(
+            this->cudnn_handle, this->w_desc_.desc(), this->x_desc_.desc(), this->conv_desc_.desc(),
+            this->h_desc_.desc(), 1, &algo_count, &perf_results));
+        this->algo_ = perf_results.algo;
+    }
+
  public:
    /**
     * @brief Construct a new Convolution Backward Data Function object

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function/convolution_backward_filter.h
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function/convolution_backward_filter.h
@@ -31,6 +31,17 @@ template <typename T1, typename T2> class ConvolutionBackwardFilterFunction : pu
            this->cudnn_handle, this->x_desc_.desc(), this->h_desc_.desc(), this->conv_desc_.desc(),
            this->w_desc_.desc(), this->bwd_filter_algo_, &this->fwd_workspace_size_));
    }
+    /**
+     * @brief Find the best algorithm for cudnn convolution functions
+     */
+    virtual void find_best_algo() {
+        int algo_count;
+        cudnnConvolutionBwdFilterAlgoPerf_t perf_results;
+        CHECK_CUDNN_ERROR(cudnnFindConvolutionBackwardFilterAlgorithm(
+            this->cudnn_handle, this->x_desc_.desc(), this->h_desc_.desc(), this->conv_desc_.desc(),
+            this->w_desc_.desc(), 1, &algo_count, &perf_results));
+        this->algo_ = perf_results.algo;
+    }

  public:
    /**

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function/convolution_forward.h
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function/convolution_forward.h
@@ -31,6 +31,17 @@ template <typename T1, typename T2> class ConvolutionForwardFunction : public Cu
            this->cudnn_handle, this->x_desc_.desc(), this->w_desc_.desc(), this->conv_desc_.desc(),
            this->h_desc_.desc(), this->fwd_algo_, &this->fwd_workspace_size_));
    }
+    /**
+     * @brief Find the best algorithm for cudnn convolution functions
+     */
+    virtual void find_best_algo() {
+        int algo_count;
+        cudnnConvolutionFwdAlgoPerf_t perf_results;
+        CHECK_CUDNN_ERROR(cudnnFindConvolutionForwardAlgorithm(this->cudnn_handle, this->x_desc_.desc(),
+                                                               this->w_desc_.desc(), this->conv_desc_.desc(),
+                                                               this->h_desc_.desc(), 1, &algo_count, &perf_results));
+        this->algo_ = perf_results.algo;
+    }

  public:
    /**

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function/cudnn_config.h
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function/cudnn_config.h
@@ -58,6 +58,7 @@ class CudnnConfig {
    cudnnDataType_t input_type_;  ///< selects the data type in which the computation will be done
    cudnnDataType_t conv_type_;   ///< selects the data type in which the convolution will be done
    std::string function_str_;    ///< the str representing the cudnn function with params
+    bool auto_algo_;              ///< whether to use auto algo selection

  public:
    void set_num_test(int num_test) { this->num_test = num_test; }
@@ -80,6 +81,7 @@ class CudnnConfig {
    void set_input_type(const cudnnDataType_t &input_type) { input_type_ = input_type; }
    void set_conv_type(const cudnnDataType_t &conv_type) { input_type_ = conv_type; }
    void set_function(const std::string &str) { function_str_ = str; }
+    void set_auto_algo(bool auto_algo) { auto_algo_ = auto_algo; }

    std::vector<int> &get_input_dims() { return input_dims_; }
    std::vector<int> &get_input_stride() { return input_stride_; }
@@ -98,6 +100,7 @@ class CudnnConfig {
    std::string &get_name() { return name; }
    cudnn_function_name_enum get_e_name() { return e_name; }
    std::string &get_function_str() { return function_str_; }
+    bool get_auto_algo() { return auto_algo_; }
    /**
     * @brief Convert name string to enum name
     * @return cudnn_function_name_enum

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function/cudnn_function.h
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function/cudnn_function.h
@@ -45,6 +45,10 @@ template <typename T1, typename T2> class CudnnFunction : public CudnnConfig {
     * @brief launch the kernel/function
     */
    virtual void kernel_entry() {}
+    /**
+     * @brief Find the best algorithm for cudnn convolution functions
+     */
+    virtual void find_best_algo() {}

  public:
    /**
@@ -87,6 +91,9 @@ template <typename T1, typename T2> void CudnnFunction<T1, T2>::prepare_for_func
    // Set Convolution MathType
    cudnnMathType_t algo = get_use_tensor_op() ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH;
    CHECK_CUDNN_ERROR(cudnnSetConvolutionMathType(conv_desc_.desc(), algo));
+    if (this->auto_algo_) {
+        find_best_algo();
+    }
    // Set convolution algorithm and workspace size
    this->get_workspace_size();
    zeros<float>(&fwd_workspace_, std::vector<int>{static_cast<int>(this->fwd_workspace_size_ / sizeof(float)), 1});

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function/cudnn_function_helper.h
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function/cudnn_function_helper.h
@@ -67,12 +67,24 @@ class Options {
        return "";
    }

+    /** @brief Get the bool type value of cmd line argument
+     * @param  option           the cmd line argument
+     * @return bool             the bool type value of cmd line argument 'option'
+     */
+    bool get_cmd_line_argument_bool(const std::string &option) {
+        if (std::find(begin, end, option) != end) {
+            return true;
+        }
+        return false;
+    }
+
  public:
    int num_test;
    int warm_up;
    int num_in_step;
    int random_seed;
    std::string para_info_json;
+    bool auto_algo;

    /**
     * @brief Construct a new Command Line object
@@ -91,6 +103,7 @@ class Options {
        random_seed = get_cmd_line_argument_int("--random_seed");
        random_seed = (random_seed == 0 ? time(NULL) : random_seed);
        para_info_json = get_cmd_line_argument_string("--config_json");
+        auto_algo = get_cmd_line_argument_bool("--enable_auto_algo");
        para_info_json =
            para_info_json == ""
                ? R"({"algo":0,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],"filterDims":[32,128,3,3],"inputDims":[32,128,14,14],"inputStride":[25088,196,14,1],"inputType":0,"mode":1, "name":"cudnnConvolutionBackwardFilter","outputDims":[32,32,14,14],"outputStride":[6272,196,14,1],"padA":[1,1],"tensorOp":false})"
@@ -126,8 +139,10 @@ void from_json(const json &j, cudnn_test::CudnnConfig &fn) {
    fn.set_input_stride(input_stride);
    auto output_stride = j.at("outputStride").get<std::vector<int>>();
    fn.set_output_stride(output_stride);
-    auto algo = j.at("algo").get<int>();
-    fn.set_algo(algo);
+    if (j.contains("algo")) {
+        auto algo = j.at("algo").get<int>();
+        fn.set_algo(algo);
+    }
    auto padA = j.at("padA").get<std::vector<int>>();
    fn.set_padA(padA);
    auto filter_strideA = j.at("filterStrideA").get<std::vector<int>>();
@@ -178,6 +193,7 @@ void run_benchmark(Options &options) {
        function.set_warm_up(options.warm_up);
        function.set_num_in_step(options.num_in_step);
        function.set_random_seed(options.random_seed);
+        function.set_auto_algo(options.auto_algo);
        if (function.get_input_type() == CUDNN_DATA_FLOAT && function.get_conv_type() == CUDNN_DATA_FLOAT) {
            auto p_function = get_cudnn_function_pointer<float, float>(function);
            p_function->benchmark();

--- a/tests/benchmarks/micro_benchmarks/test_cudnn_function.py
+++ b/tests/benchmarks/micro_benchmarks/test_cudnn_function.py
@@ -85,8 +85,7 @@ def test_cudnn_functions():
        if metric != 'return_code':
            assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)

-
-# Test for custom list configuration
+    # Test for custom list configuration
    custom_config_str2 = '{"algo":1,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],' \
        + '"filterDims":[32,128,3,3],"inputDims":[32,32,14,14],"inputStride":[6272, 196, 14, 1],"inputType":2,'\
        + '"mode":1,"name":"cudnnConvolutionBackwardData","outputDims":[32, 128, 14, 14],'\
@@ -126,3 +125,26 @@ def test_cudnn_functions():
        assert (isinstance(benchmark.result[metric][0], numbers.Number))
        if metric != 'return_code':
            assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
+
+    # Test for auto_algo parameter
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cudnn-function',
+        platform=Platform.CUDA,
+        parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --enable_auto_algo'
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark._args.enable_auto_algo is True)
+
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+
+    assert (18 + benchmark.default_metric_count == len(benchmark.result))
+    for metric in list(benchmark.result.keys()):
+        assert (len(benchmark.result[metric]) == 1)
+        assert (isinstance(benchmark.result[metric][0], numbers.Number))
+        if metric != 'return_code':
+            assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)