Unverified Commit 61c258fe authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Add benchmark - add source code of cudnn function micro benchmark (#78)

* Benchmarks: Add benchmark - add source code of cudnn function micro benchmark
parent 5e9f948d
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
cmake_minimum_required(VERSION 3.18)
project(CudnnBenchmark LANGUAGES CUDA CXX)
include(../cuda_common.cmake)
SET(SRC "cudnn_helper.cpp" CACHE STRING "source file")
SET(TARGET_NAME "cudnn_function" CACHE STRING "target name")
find_package(CUDAToolkit REQUIRED)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${NVCC_ARCHS_SUPPORTED}")
add_library(${TARGET_NAME} SHARED ${SRC})
link_directories( ${CUDAToolkit_LIBRARY_DIR} ${CUDAToolkit_TARGET_DIR})
include_directories( ${CUDAToolkit_INCLUDE_DIRS})
find_library(CUDNN_LIBRARY cudnn
HINTS ${CUDAToolkit_ROOT_DIR}
PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
include(FetchContent)
FetchContent_Declare(json
GIT_REPOSITORY https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent
GIT_TAG v3.7.3)
FetchContent_GetProperties(json)
if(NOT json_POPULATED)
FetchContent_Populate(json)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
add_executable(CudnnBenchmark cudnn_test.cpp)
target_link_libraries(CudnnBenchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
install(TARGETS CudnnBenchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include "cudnn_function.h"
namespace cudnn_test {
/**
* @brief Class of ConvolutionBackwardDataFunction
* @tparam T1 input data type
* @tparam T2 conv type
*/
template <typename T1, typename T2> class ConvolutionBackwardDataFunction : public CudnnFunction<T1, T2> {
cudnnConvolutionBwdDataAlgo_t bwd_data_algo_;
/**
* @brief Execute the kernel/function
*/
virtual void kernel_entry() {
CHECK_CUDNN_ERROR(cudnnConvolutionBackwardData(
this->cudnn_handle, &this->alpha_, this->w_desc_.desc(), this->filter, this->x_desc_.desc(), this->x,
this->conv_desc_.desc(), this->bwd_data_algo_, this->fwd_workspace_, this->fwd_workspace_size_,
&this->beta_, this->h_desc_.desc(), this->h));
}
/**
* @brief Get and set convolution algorithm and workspace size used in cudnn convolution functions
*/
virtual void get_workspace_size() {
bwd_data_algo_ = cudnnConvolutionBwdDataAlgo_t(this->algo_);
CHECK_CUDNN_ERROR(cudnnGetConvolutionBackwardDataWorkspaceSize(
this->cudnn_handle, this->w_desc_.desc(), this->x_desc_.desc(), this->conv_desc_.desc(),
this->h_desc_.desc(), this->bwd_data_algo_, &this->fwd_workspace_size_));
}
public:
/**
* @brief Construct a new Convolution Backward Data Function object
*/
ConvolutionBackwardDataFunction() {}
/**
* @brief Construct a new Convolution Backward Data Function object
* @param config base class CudnnConfig object
*/
ConvolutionBackwardDataFunction(CudnnConfig &config) : CudnnFunction<T1, T2>(config) {}
};
} // namespace cudnn_test
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include "cudnn_function.h"
namespace cudnn_test {
/**
* @brief Class of ConvolutionBackwardFilterFunction
* @tparam T1 input data type
* @tparam T2 conv type
*/
template <typename T1, typename T2> class ConvolutionBackwardFilterFunction : public CudnnFunction<T1, T2> {
cudnnConvolutionBwdFilterAlgo_t bwd_filter_algo_;
/**
* @brief Execute the kernel/function
*/
virtual void kernel_entry() {
CHECK_CUDNN_ERROR(cudnnConvolutionBackwardFilter(
this->cudnn_handle, &this->alpha_, this->x_desc_.desc(), this->x, this->h_desc_.desc(), this->h,
this->conv_desc_.desc(), this->bwd_filter_algo_, this->fwd_workspace_, this->fwd_workspace_size_,
&this->beta_, this->w_desc_.desc(), this->filter));
}
/**
* @brief Get and set convolution algorithm and workspace size used in cudnn convolution functions
*/
virtual void get_workspace_size() {
bwd_filter_algo_ = cudnnConvolutionBwdFilterAlgo_t(this->algo_);
CHECK_CUDNN_ERROR(cudnnGetConvolutionBackwardFilterWorkspaceSize(
this->cudnn_handle, this->x_desc_.desc(), this->h_desc_.desc(), this->conv_desc_.desc(),
this->w_desc_.desc(), this->bwd_filter_algo_, &this->fwd_workspace_size_));
}
public:
/**
* @brief Construct a new Convolution Backward Filter Function object
*/
ConvolutionBackwardFilterFunction() {}
/**
* @brief Construct a new Convolution Backward Filter Function object
* @param config base class CudnnConfig object
*/
ConvolutionBackwardFilterFunction(CudnnConfig &config) : CudnnFunction<T1, T2>(config) {}
};
} // namespace cudnn_test
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include "cudnn_function.h"
namespace cudnn_test {
/**
* @brief Class of ConvolutionForwardFunction
* @tparam T1 input data type
* @tparam T2 conv type
*/
template <typename T1, typename T2> class ConvolutionForwardFunction : public CudnnFunction<T1, T2> {
cudnnConvolutionFwdAlgo_t fwd_algo_;
/**
* @brief Execute the kernel/function
*/
virtual void kernel_entry() {
CHECK_CUDNN_ERROR(cudnnConvolutionForward(this->cudnn_handle, &this->alpha_, this->x_desc_.desc(), this->x,
this->w_desc_.desc(), this->filter, this->conv_desc_.desc(),
this->fwd_algo_, this->fwd_workspace_, this->fwd_workspace_size_,
&this->beta_, this->h_desc_.desc(), this->h));
}
/**
* @brief Get and set convolution algorithm and workspace size used in cudnn convolution functions
*/
virtual void get_workspace_size() {
fwd_algo_ = cudnnConvolutionFwdAlgo_t(this->algo_);
CHECK_CUDNN_ERROR(cudnnGetConvolutionForwardWorkspaceSize(
this->cudnn_handle, this->x_desc_.desc(), this->w_desc_.desc(), this->conv_desc_.desc(),
this->h_desc_.desc(), this->fwd_algo_, &this->fwd_workspace_size_));
}
public:
/**
* @brief Construct a new Convolution Forward Function object
*/
ConvolutionForwardFunction() {}
/**
* @brief Construct a new Convolution Forward Function object
* @param config base class CudnnConfig object
*/
ConvolutionForwardFunction(CudnnConfig &config) : CudnnFunction<T1, T2>(config) {}
};
} // namespace cudnn_test
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <unordered_map>
#include "cudnn_helper.h"
namespace cudnn_test {
/**
* @brief Enum of cudnn function name
*/
enum cudnn_function_name_enum {
e_cudnnConvolutionForward,
e_cudnnConvolutionBackwardData,
e_cudnnConvolutionBackwardFilter,
};
/**
* @brief Map from cudnn function name to cudnn function name enum
*/
static std::unordered_map<std::string, cudnn_function_name_enum> const cudnn_function_name_string = {
{"cudnnConvolutionForward", cudnn_function_name_enum::e_cudnnConvolutionForward},
{"cudnnConvolutionBackwardData", cudnn_function_name_enum::e_cudnnConvolutionBackwardData},
{"cudnnConvolutionBackwardFilter", cudnn_function_name_enum::e_cudnnConvolutionBackwardFilter},
};
/**
* @brief Class to store the configuration of cudnn function params
*/
class CudnnConfig {
protected:
int num_test; ///< the number of steps used to test and measure
int warm_up; ///< the number of steps used to warm up
int num_in_step; ///< the number of functions invoking in a step
int random_seed; ///< the random seed used to generate random data
std::string name; ///< the name of the cudnn function
cudnn_function_name_enum e_name; ///< enum cudnn functin name
std::vector<int> input_dims_; ///< array of input dimension that contain the size of the tensor for every dimension
std::vector<int>
input_stride_; ///< array of input dimension that contain the stride of the tensor for every dimension
std::vector<int>
filter_dims_; ///< array of filter dimension that contain the size of the tensor for every dimension
std::vector<int>
output_dims_; ///< array of outpur dimension that contain the size of the tensor for every dimension
std::vector<int>
output_stride_; ///< array of output dimension that contain the stride of the tensor for every dimension
int algo_; ///< enumerant that specifies which convolution algorithm should be used to compute the results
int array_length_; ///< dimension of the convolution
std::vector<int> padA_; ///< array of convolution dimension containing the zero-padding size for each dimension.
std::vector<int>
filter_strideA_; ///< array of convolution dimension containing the filter stride for each dimension
std::vector<int> dilationA_; ///< array of dimension array_length containing the dilation factor for each dimension
cudnnConvolutionMode_t mode_; ///< selects between CUDNN_CONVOLUTION and CUDNN_CROSS_CORRELATION
bool use_tensor_op_; ///< specify whether or not the use of tensor op is permitted in the library routines
///< associated with a given convolution descriptor
cudnnDataType_t input_type_; ///< selects the data type in which the computation will be done
cudnnDataType_t conv_type_; ///< selects the data type in which the convolution will be done
std::string function_str_; ///< the str representing the cudnn function with params
public:
void set_num_test(int num_test) { this->num_test = num_test; }
void set_warm_up(int warm_up) { this->warm_up = warm_up; }
void set_num_in_step(int num_in_step) { this->num_in_step = num_in_step; }
void set_random_seed(int random_seed) { this->random_seed = random_seed; }
void set_name(const std::string &n) { name = n; }
void set_input_dims(const std::vector<int> &input_dims) { input_dims_ = input_dims; }
void set_input_stride(const std::vector<int> &input_stride) { input_stride_ = input_stride; }
void set_filter_dims(const std::vector<int> &filter_dims) { filter_dims_ = filter_dims; }
void set_output_dims(const std::vector<int> &output_dims) { output_dims_ = output_dims; }
void set_output_stride(const std::vector<int> &output_stride) { output_stride_ = output_stride; }
void set_algo(int algo) { algo_ = algo; }
void set_array_length(int array_length) { array_length_ = array_length; }
void set_padA(const std::vector<int> &padA) { padA_ = padA; }
void set_filter_strideA(const std::vector<int> &filter_strideA) { filter_strideA_ = filter_strideA; }
void set_dilationA(const std::vector<int> &dilationA) { dilationA_ = dilationA; }
void set_mode(const cudnnConvolutionMode_t &mode) { mode_ = mode; }
void set_use_tensor_op(bool use_tensor_op) { use_tensor_op_ = use_tensor_op; }
void set_input_type(const cudnnDataType_t &input_type) { input_type_ = input_type; }
void set_conv_type(const cudnnDataType_t &conv_type) { input_type_ = conv_type; }
void set_function(const std::string &str) { function_str_ = str; }
std::vector<int> &get_input_dims() { return input_dims_; }
std::vector<int> &get_input_stride() { return input_stride_; }
std::vector<int> &get_filter_dims() { return filter_dims_; }
std::vector<int> &get_output_dims() { return output_dims_; }
std::vector<int> &get_output_stride() { return output_stride_; }
int get_algo() { return algo_; }
int get_array_length() { return array_length_; }
std::vector<int> &get_padA() { return padA_; }
std::vector<int> &get_filter_strideA() { return filter_strideA_; }
std::vector<int> &get_dilationA() { return dilationA_; }
cudnnConvolutionMode_t &get_mode() { return mode_; }
bool get_use_tensor_op() { return use_tensor_op_; }
cudnnDataType_t &get_input_type() { return input_type_; }
cudnnDataType_t &get_conv_type() { return input_type_; }
std::string &get_name() { return name; }
cudnn_function_name_enum get_e_name() { return e_name; }
std::string &get_function_str() { return function_str_; }
/**
* @brief Convert name string to enum name
* @return cudnn_function_name_enum
*/
cudnn_function_name_enum name2enum() {
auto it = cudnn_function_name_string.find(this->name);
if (it != cudnn_function_name_string.end()) {
this->e_name = it->second;
return e_name;
} else {
throw "ERROR: invalid input function name";
}
}
};
} // namespace cudnn_test
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <chrono>
#include <iomanip>
#include <tuple>
#include "cudnn_config.h"
namespace cudnn_test {
/**
* @brief Generation of cudnn functions' params and run the benchmark of this function
*
* @tparam T1 the type of TensorDescriptor
* @tparam T2 the type of ConvolutionDescriptor
*/
template <typename T1, typename T2> class CudnnFunction : public CudnnConfig {
protected:
cudnnHandle_t cudnn_handle;
TensorDescriptorNd<T1> x_desc_;
FilterDescriptorNd<T1> w_desc_;
ConvolutionDescriptor<T2> conv_desc_;
TensorDescriptorNd<T1> h_desc_;
size_t fwd_workspace_size_;
float *fwd_workspace_;
T1 *x, *filter, *h;
const float alpha_ = 1.f;
const float beta_ = 0.f;
/**
* @brief Malloc cuda memory and fill in value for data params used in the cudnn function
*/
void prepare_input();
/**
* @brief Generate some params used in the cudnn function
*/
void prepare_for_function();
/**
* @brief Get and set convolution algorithm and workspace size used in cudnn convolution functions
*/
virtual void get_workspace_size() {}
/**
* @brief launch the kernel/function
*/
virtual void kernel_entry() {}
public:
/**
* @brief Construct a new Cudnn Function object according to a CudnnConfig object, including initialization for
* cudnn handle and curand
*
* @param config a CudnnConfig object including configuration of the params to the cudnn function
*/
CudnnFunction(CudnnConfig &config) : CudnnConfig(config) {
// Init cudnn handle and device
cudnn_handle_init(&this->cudnn_handle);
}
/**
* @brief Destroy the Cudnn Function object, including free cuda memory and handle of cudnn and curand
*/
virtual ~CudnnFunction() {
// free context and memory
CUDA_SAFE_CALL(cudaFree(x));
CUDA_SAFE_CALL(cudaFree(filter));
CUDA_SAFE_CALL(cudaFree(h));
cudnn_handle_free(&this->cudnn_handle);
}
/**
* @brief The main procedure for cudnn function test, including warmup, function test and time measurement
*/
void benchmark();
};
/**
* @brief Generate some params used in the cudnn function
*/
template <typename T1, typename T2> void CudnnFunction<T1, T2>::prepare_for_function() {
// Generate descriptor
conv_desc_ =
ConvolutionDescriptor<T2>(get_array_length(), get_padA(), get_filter_strideA(), get_dilationA(), get_mode());
x_desc_ = TensorDescriptorNd<T1>(get_input_dims(), get_input_stride());
w_desc_ = FilterDescriptorNd<T1>(get_filter_dims());
h_desc_ = TensorDescriptorNd<T1>(get_output_dims(), get_output_stride());
// Set Convolution MathType
cudnnMathType_t algo = get_use_tensor_op() ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH;
CHECK_CUDNN_ERROR(cudnnSetConvolutionMathType(conv_desc_.desc(), algo));
// Set convolution algorithm and workspace size
this->get_workspace_size();
zeros<float>(&fwd_workspace_, std::vector<int>{static_cast<int>(this->fwd_workspace_size_ / sizeof(float)), 1});
}
/**
* @brief Malloc cuda memory and fill in value for data params used in the cudnn function
*/
template <typename T1, typename T2> void CudnnFunction<T1, T2>::prepare_input() {
// Allocate memory for filter data
rand<T1>(&filter, get_filter_dims(), random_seed);
// Allocate memory for input data
rand<T1>(&x, get_input_dims(), random_seed);
// Allocate memory for output data
rand<T1>(&h, get_output_dims(), random_seed);
}
/**
* @brief The main procedure for cudnn function test, including warmup, function test and time measurement
*/
template <typename T1, typename T2> void CudnnFunction<T1, T2>::benchmark() {
// Prepare some Prerequisites for function running
prepare_for_function();
// Allocate memory and fill with data of input and output tensor
prepare_input();
// Warm up
for (int i = 0; i < warm_up; ++i) {
for (int j = 0; j < num_in_step; j++) {
kernel_entry();
}
}
CUDA_SAFE_CALL(cudaDeviceSynchronize());
// Prepare some varibles for time measurement
std::vector<float> iteration_time;
// Benchmark in range of steps
for (int i_ = 0; i_ < num_test; i_++) {
// Collect time within each step, including #num_in_step times function invoking
auto start = std::chrono::high_resolution_clock::now();
for (int j = 0; j < num_in_step; j++) {
kernel_entry();
}
CUDA_SAFE_CALL(cudaDeviceSynchronize());
auto end = std::chrono::high_resolution_clock::now();
// Convert step time to single function duration and update min and max duration
float i = static_cast<float>(std::chrono::duration<double, std::milli>(end - start).count() / num_in_step);
iteration_time.emplace_back(i);
}
// Output results
std::cout << "[function config]: " << this->get_function_str() << std::endl;
std::cout << "[raw_data]: ";
for (int i = 0; i < iteration_time.size(); i++) {
std::cout << iteration_time[i] << ",";
}
std::cout << std::endl;
}
} // namespace cudnn_test
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
/**
* @brief Helper for parsing command line arguments and pass params to CudnnConfig
*/
#pragma once
#include <fstream>
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
#include <vector>
#include <nlohmann/json.hpp>
#include "convolution_backward_data.h"
#include "convolution_backward_filter.h"
#include "convolution_forward.h"
using json = nlohmann::json;
namespace cudnn_test {
/**
* @brief Utility for storing command line arguments
*/
class Options {
char **begin;
char **end;
/**
* @brief Get the char* value of the cmd line argument
* @param option the argument in cmd
* @return char*
*/
char *get_cmd_option(const std::string &option) {
char **itr = std::find(begin, end, option);
if (itr != end && ++itr != end) {
return *itr;
}
return 0;
}
/**
* @brief Get the int type value of cmd line argument
* @param option the cmd line argument
* @return int the int type value of cmd line argument 'option'
*/
int get_cmd_line_argument_int(const std::string &option) {
if (char *value = get_cmd_option(option)) {
return std::stoi(value);
}
return 0;
}
/**
* @brief Get the string type value of cmd line argument
* @param option the cmd line argument
* @return std::string the int type value of cmd line argument 'option'
*/
std::string get_cmd_line_argument_string(const std::string &option) {
if (char *value = get_cmd_option(option)) {
return std::string(value);
}
return "";
}
public:
int num_test;
int warm_up;
int num_in_step;
int random_seed;
std::string para_info_json;
/**
* @brief Construct a new Command Line object
* @param argc
* @param argv
*/
Options(int argc, char *argv[]) {
begin = argv;
end = argv + argc;
num_test = get_cmd_line_argument_int("--num_test");
num_test = (num_test == 0 ? 1 : num_test);
warm_up = get_cmd_line_argument_int("--warm_up");
warm_up = (warm_up == 0 ? 1 : warm_up);
num_in_step = get_cmd_line_argument_int("--num_in_step");
num_in_step = (num_in_step == 0 ? 100 : num_in_step);
random_seed = get_cmd_line_argument_int("--random_seed");
random_seed = (random_seed == 0 ? time(NULL) : random_seed);
para_info_json = get_cmd_line_argument_string("--config_json");
para_info_json =
para_info_json == ""
? R"({"algo":0,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],"filterDims":[32,128,3,3],"inputDims":[32,128,14,14],"inputStride":[25088,196,14,1],"inputType":0,"mode":1, "name":"cudnnConvolutionBackwardFilter","outputDims":[32,32,14,14],"outputStride":[6272,196,14,1],"padA":[1,1],"tensorOp":false})"
: para_info_json;
}
};
/**
* @brief Helper function to convert from json to CudnnConfig
*
* @param j json including the params of a cudnn function read from 'config_path'
* @param fn a CudnnConfig object
*/
void from_json(const json &j, cudnn_test::CudnnConfig &fn) {
auto str = j.dump();
std::replace(str.begin(), str.end(), '\"', ' ');
fn.set_function(str);
auto name = j.at("name").get<std::string>();
fn.set_name(name);
auto input_dims = j.at("inputDims").get<std::vector<int>>();
fn.set_input_dims(input_dims);
auto output_dims = j.at("outputDims").get<std::vector<int>>();
fn.set_output_dims(output_dims);
auto filter_dims = j.at("filterDims").get<std::vector<int>>();
fn.set_filter_dims(filter_dims);
auto input_type = j.at("inputType").get<cudnnDataType_t>();
fn.set_input_type(input_type);
auto conv_type = j.at("convType").get<cudnnDataType_t>();
fn.set_conv_type(conv_type);
auto array_length = j.at("arrayLength").get<int>();
fn.set_array_length(array_length);
auto input_stride = j.at("inputStride").get<std::vector<int>>();
fn.set_input_stride(input_stride);
auto output_stride = j.at("outputStride").get<std::vector<int>>();
fn.set_output_stride(output_stride);
auto algo = j.at("algo").get<int>();
fn.set_algo(algo);
auto padA = j.at("padA").get<std::vector<int>>();
fn.set_padA(padA);
auto filter_strideA = j.at("filterStrideA").get<std::vector<int>>();
fn.set_filter_strideA(filter_strideA);
auto dilationA = j.at("dilationA").get<std::vector<int>>();
fn.set_dilationA(dilationA);
auto mode = j.at("mode").get<cudnnConvolutionMode_t>();
fn.set_mode(mode);
auto use_tensor_op = j.at("tensorOp").get<bool>();
fn.set_use_tensor_op(use_tensor_op);
fn.name2enum();
}
/**
* @brief Get the cudnn function pointer of a specific child class object
* @param function base class object of a cudnnFunction, used to initialize the base part of the child class
* object
* @return cudnnFunction* return a base cudnn function pointer of a specific child class
*/
template <typename T1, typename T2> CudnnFunction<T1, T2> *get_cudnn_function_pointer(CudnnConfig &function) {
switch (function.get_e_name()) {
case e_cudnnConvolutionForward:
return new ConvolutionForwardFunction<T1, T2>(function);
case e_cudnnConvolutionBackwardData:
return new ConvolutionBackwardDataFunction<T1, T2>(function);
case e_cudnnConvolutionBackwardFilter:
return new ConvolutionBackwardFilterFunction<T1, T2>(function);
default:
throw "invalid function name";
}
}
/**
* @brief run the entire process of benchmark according to cmd auguments
*
* first, read the para_info_json file in json array format representing multiple cudnn functions
* then for each cudnn function, get the pointer of the class object the specific cudnn function
* finally run the benchmark of each funcion
*
* @param options the cmd arguments of the application
*/
void run_benchmark(Options &options) {
try {
json function_config = json::parse(options.para_info_json);
// convert function params from json to CudnnConfig class
cudnn_test::CudnnConfig function = function_config.get<cudnn_test::CudnnConfig>();
function.set_num_test(options.num_test);
function.set_warm_up(options.warm_up);
function.set_num_in_step(options.num_in_step);
function.set_random_seed(options.random_seed);
if (function.get_input_type() == CUDNN_DATA_FLOAT && function.get_conv_type() == CUDNN_DATA_FLOAT) {
auto p_function = get_cudnn_function_pointer<float, float>(function);
p_function->benchmark();
delete p_function;
} else {
if (function.get_input_type() == CUDNN_DATA_HALF && function.get_conv_type() == CUDNN_DATA_FLOAT) {
auto p_function = get_cudnn_function_pointer<half, float>(function);
p_function->benchmark();
delete p_function;
} else {
if (function.get_input_type() == CUDNN_DATA_HALF && function.get_conv_type() == CUDNN_DATA_HALF) {
auto p_function = get_cudnn_function_pointer<half, half>(function);
p_function->benchmark();
delete p_function;
} else {
throw "invalid input and conv type";
}
}
}
} catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;
}
}
} // namespace cudnn_test
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
/**
* @brief Cpp file for some functions related to cudnn
*/
#include <cstdlib>
#include <numeric>
#include "cudnn_function.h"
namespace cudnn_test {
/**
* @brief check cudnn function running status and throw error str
*/
void throw_cudnn_err(cudnnStatus_t result, const char *func, const char *file, int const line) {
if (result != CUDNN_STATUS_SUCCESS) {
const char *msg = cudnnGetErrorString(result);
std::stringstream safe_call_ss;
safe_call_ss << func << " failed with error"
<< "\nfile: " << file << "\nline: " << line << "\nmsg: " << msg;
throw std::runtime_error(safe_call_ss.str());
}
}
/**
* @brief check cudnn function running status and throw error str
*/
void check_cuda(cudaError_t result, const char *func, const char *file, int const line) {
if (result != cudaSuccess) {
const char *msg = cudaGetErrorString(result);
std::stringstream safe_call_ss;
safe_call_ss << func << " failed with error"
<< "\nfile: " << file << "\nline: " << line << "\nmsg: " << msg;
// Make sure we call CUDA Device Reset before exiting
throw std::runtime_error(safe_call_ss.str());
}
}
/**
* @brief Cuda context init
*/
void cudnn_handle_init(cudnnHandle_t *cudnn_handle) {
CUDA_SAFE_CALL(cudaDeviceReset());
CUDA_SAFE_CALL(cudaSetDevice(0));
// create streams/handles
CHECK_CUDNN_ERROR(cudnnCreate(cudnn_handle));
}
/**
* @brief Cuda context free
*/
void cudnn_handle_free(cudnnHandle_t *cudnn_handle) { CHECK_CUDNN_ERROR(cudnnDestroy(*cudnn_handle)); }
/**
* @brief Malloc cuda memory and fill in rand value
* @tparam T
* @param input the pointer of input
* @param dims_ the shape of input
* @param random_seed the random seed to generate random data
*/
template <typename T> void rand(T **input, std::vector<int> dims_, int random_seed) {
throw "unsupported rand data type";
}
template <> void rand(float **input, std::vector<int> dims_, int random_seed) {
int size = std::accumulate(dims_.begin(), dims_.end(), 1, std::multiplies<int>());
CUDA_SAFE_CALL(cudaMalloc((void **)input, sizeof(float) * size));
float *host_input;
CUDA_SAFE_CALL(cudaMallocHost(&host_input, sizeof(float) * size));
srand(random_seed);
for (int i = 0; i < size; i++) {
host_input[i] = (float)std::rand() / (float)(RAND_MAX);
}
// copy input data from host to device
CUDA_SAFE_CALL(cudaMemcpy(*input, host_input, sizeof(float) * size, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaFreeHost(host_input));
}
template <> void rand(half **input, std::vector<int> dims_, int random_seed) {
int size = std::accumulate(dims_.begin(), dims_.end(), 1, std::multiplies<int>());
CUDA_SAFE_CALL(cudaMalloc((void **)input, sizeof(half) * size));
half *host_input;
CUDA_SAFE_CALL(cudaMallocHost(&host_input, sizeof(half) * size));
for (int i = 0; i < size; i++) {
host_input[i] = __float2half((float)std::rand() / (float)(RAND_MAX));
}
CUDA_SAFE_CALL(cudaMemcpy(host_input, *input, sizeof(half) * size, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaFreeHost(host_input));
}
} // namespace cudnn_test
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
/**
* @brief Header file for some functions related to cudnn
*/
#pragma once
#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <cuda.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <cudnn.h>
#include <curand.h>
#include <curand_kernel.h>
namespace cudnn_test {
/**
* @brief check cudnn function running status and throw error str
*/
void throw_cudnn_err(cudnnStatus_t result, const char *func, const char *file, int const line);
#define CHECK_CUDNN_ERROR(x) throw_cudnn_err((x), #x, __FILE__, __LINE__)
/**
* @brief check cudnn function running status and throw error str
*/
void check_cuda(cudaError_t result, const char *func, const char *file, int const line);
#define CUDA_SAFE_CALL(x) check_cuda((x), #x, __FILE__, __LINE__)
/**
* @brief Cuda context init
*/
void cudnn_handle_init(cudnnHandle_t *cudnn_handle);
/**
* @brief Cuda context free
*/
void cudnn_handle_free(cudnnHandle_t *cudnn_handle);
/**
* @brief Malloc cuda memory and fill in rand value
* @tparam T
* @param input the pointer of input
* @param dims_ the shape of input
* @param random_seed the random seed to generate random data
*/
template <typename T> void rand(T **input, std::vector<int> dims_, int random_seed);
/**
* @brief Malloc cuda memory and fill in zero
* @tparam T
* @param input the pointer of input
* @param dims_ the shape of input
*/
template <typename T> void zeros(T **input, std::vector<int> dims_) {
int size = std::accumulate(dims_.begin(), dims_.end(), 1, std::multiplies<int>());
CUDA_SAFE_CALL(cudaMalloc((void **)input, sizeof(T) * size));
CUDA_SAFE_CALL(cudaMemset((void *)*input, 0, sizeof(T) * size));
}
/**
* @brief Get cudnn tensor format
* @tparam T
* @param tensor_format cudnnTensorFormat_t
*/
template <typename T> void get_tensor_format(cudnnTensorFormat_t &tensor_format) {
// For int8 inference, the supported format is NHWC
if (std::is_same<T, uint8_t>::value) {
tensor_format = CUDNN_TENSOR_NHWC;
} else {
tensor_format = CUDNN_TENSOR_NCHW;
}
}
/**
* @brief Get cudnn tensor data type
* @tparam T
* @param type cudnnDataType_t
*/
template <typename T> void get_tensor_type(cudnnDataType_t &type) {
if (std::is_same<T, float>::value) {
type = CUDNN_DATA_FLOAT;
} else if (std::is_same<T, half>::value) {
type = CUDNN_DATA_HALF;
}
#if CUDNN_MAJOR >= 6
else if (std::is_same<T, uint8_t>::value)
type = CUDNN_DATA_INT8;
#endif
else
throw("unknown type in tensorDescriptor");
}
/**
* @brief RAII wrapper for TensorDescriptorNd
* @tparam T
*/
template <typename T> class TensorDescriptorNd {
std::shared_ptr<cudnnTensorDescriptor_t> desc_;
struct TensorDescriptorNdDeleter {
void operator()(cudnnTensorDescriptor_t *desc) {
CHECK_CUDNN_ERROR(cudnnDestroyTensorDescriptor(*desc));
delete desc;
}
};
public:
TensorDescriptorNd() {}
TensorDescriptorNd(const std::vector<int> &dim, const std::vector<int> &stride)
: desc_(new cudnnTensorDescriptor_t, TensorDescriptorNdDeleter()) {
cudnnDataType_t type;
get_tensor_type<T>(type);
CHECK_CUDNN_ERROR(cudnnCreateTensorDescriptor(desc_.get()));
CHECK_CUDNN_ERROR(cudnnSetTensorNdDescriptor(*desc_, type, dim.size(), dim.data(), stride.data()));
}
cudnnTensorDescriptor_t desc() const { return *desc_; }
};
/**
* @brief RAII wrapper for FilterDescriptorNd
* @tparam T
*/
template <typename T> class FilterDescriptorNd {
std::shared_ptr<cudnnFilterDescriptor_t> desc_;
struct FilterDescriptorNdDeleter {
void operator()(cudnnFilterDescriptor_t *desc) {
CHECK_CUDNN_ERROR(cudnnDestroyFilterDescriptor(*desc));
delete desc;
}
};
public:
FilterDescriptorNd() {}
FilterDescriptorNd(const std::vector<int> &dim) : desc_(new cudnnFilterDescriptor_t, FilterDescriptorNdDeleter()) {
cudnnTensorFormat_t tensor_format;
get_tensor_format<T>(tensor_format);
cudnnDataType_t type;
get_tensor_type<T>(type);
CHECK_CUDNN_ERROR(cudnnCreateFilterDescriptor(desc_.get()));
CHECK_CUDNN_ERROR(cudnnSetFilterNdDescriptor(*desc_, type, tensor_format, dim.size(), &dim[0]));
}
cudnnFilterDescriptor_t desc() { return *desc_; }
};
/**
* @brief RAII wrapper for ConvolutionDescriptor
* @tparam T
*/
template <typename T> class ConvolutionDescriptor {
std::shared_ptr<cudnnConvolutionDescriptor_t> desc_;
struct ConvolutionDescriptorDeleter {
void operator()(cudnnConvolutionDescriptor_t *desc) {
CHECK_CUDNN_ERROR(cudnnDestroyConvolutionDescriptor(*desc));
delete desc;
}
};
public:
ConvolutionDescriptor() {}
ConvolutionDescriptor(int array_length, const std::vector<int> &padA, const std::vector<int> &filter_strideA,
const std::vector<int> &dilationA, cudnnConvolutionMode_t mode)
: desc_(new cudnnConvolutionDescriptor_t, ConvolutionDescriptorDeleter()) {
cudnnDataType_t type;
get_tensor_type<T>(type);
CHECK_CUDNN_ERROR(cudnnCreateConvolutionDescriptor(desc_.get()));
CHECK_CUDNN_ERROR(cudnnSetConvolutionNdDescriptor(*desc_, array_length, padA.data(), filter_strideA.data(),
dilationA.data(), mode, type));
}
cudnnConvolutionDescriptor_t desc() const { return *desc_; };
};
} // namespace cudnn_test
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
/**
* @brief Cudnn function benchmark will read the params from cmd, and use these params
* to benchmark the wall time of the cudnn functions.
*/
#include <limits>
#include <stdexcept>
#include "cudnn_function_helper.h"
/**
* @brief Main function and entry of cudnn benchmark
* @details
* params list:
* num_test: test step nums
* warm_up: warm up step nums
* num_in_step: times each step will invoke the function
* random_seed: the random seed to generate data
* config_json: the json string including the params of the function
* functions supported:
* cudnnConvolutionForward
* cudnnConvolutionBackwardData
* cudnnConvolutionBackwardFilter
* @param argc
* @param argv
* @return int
*/
int main(int argc, char *argv[]) {
try {
// parse arguments from cmd
cudnn_test::Options options(argc, argv);
// benchmark the function
cudnn_test::run_benchmark(options);
} catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;
exit(-1);
}
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment