"git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "1da9ceefa5994624c9264431e7f65f85d2bd49ec"
Unverified Commit eba298f5 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Revision - Support flexible warmup and non-random data...

Benchmarks: Revision - Support flexible warmup and non-random data initialization in cublas-benchmark  (#479)

**Description**
revise cublas-benchmark for flexible warmup and fill data with fixed
number for perf test to improve the running efficiency.

**Major Revision**
- remove num_in_steps for warmup to support more flexible warmup setting
for users
- Add support to generate input with fixed number for perf test
parent 02923660
...@@ -190,23 +190,26 @@ def add_parser_arguments(self): ...@@ -190,23 +190,26 @@ def add_parser_arguments(self):
self._parser.add_argument( self._parser.add_argument(
'--num_warmup', '--num_warmup',
type=int, type=int,
default=8, default=8 * 1000,
required=False, required=False,
help='The number of warmup step.', help='The number of functions for warmup. By default, the total number of functions to run in warmup ' +
'is 8 warmup steps * 1000 num_in_step.',
) )
self._parser.add_argument( self._parser.add_argument(
'--num_steps', '--num_steps',
type=int, type=int,
default=100, default=100,
required=False, required=False,
help='The number of test step.', help='The number of test steps. By default, the total number of functions to run in the measured test ' +
'is 100 test steps * 1000 num_in_step.',
) )
self._parser.add_argument( self._parser.add_argument(
'--num_in_step', '--num_in_step',
type=int, type=int,
default=1000, default=1000,
required=False, required=False,
help='The number of functions in one step.', help='The number of functions in one step. By default, the total number of functions to run ' +
'in each step is 1000.',
) )
self._parser.add_argument( self._parser.add_argument(
'--random_seed', '--random_seed',
...@@ -236,6 +239,13 @@ def add_parser_arguments(self): ...@@ -236,6 +239,13 @@ def add_parser_arguments(self):
required=False, required=False,
help='The acceptable error bound for correctness check.', help='The acceptable error bound for correctness check.',
) )
self._parser.add_argument(
'--random_data',
action='store_true',
default=False,
help='Enable random data generation for performance test. ' +
'By default, the data is filled with fixed value for performance test.',
)
def _preprocess(self): def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking. """Preprocess/preparation operations before the benchmarking.
...@@ -253,6 +263,7 @@ def _preprocess(self): ...@@ -253,6 +263,7 @@ def _preprocess(self):
command += (' --random_seed ' + str(self._args.random_seed)) command += (' --random_seed ' + str(self._args.random_seed))
command += ' --correctness' if self._args.correctness else '' command += ' --correctness' if self._args.correctness else ''
command += (' --eps ' + str(self._args.eps)) if self._args.eps is not None else '' command += (' --eps ' + str(self._args.eps)) if self._args.eps is not None else ''
command += ' --random_data' if self._args.random_data else ''
try: try:
if not self._args.config_json_str: if not self._args.config_json_str:
......
...@@ -54,6 +54,7 @@ class CublasFunction { ...@@ -54,6 +54,7 @@ class CublasFunction {
int random_seed; ///< the random seed used to generate random data int random_seed; ///< the random seed used to generate random data
double eps; ///< the acceptable error bound for numeric stability double eps; ///< the acceptable error bound for numeric stability
bool correctness; ///< whether enable correctness check or not bool correctness; ///< whether enable correctness check or not
bool random_data; ///< whether enable random data generation or not
std::string name_; ///< the name of the cublas function std::string name_; ///< the name of the cublas function
int m_; ///< the m dim of matrix int m_; ///< the m dim of matrix
int k_; ///< the k dim of matrix int k_; ///< the k dim of matrix
...@@ -70,17 +71,17 @@ class CublasFunction { ...@@ -70,17 +71,17 @@ class CublasFunction {
/** /**
* @brief Fill the random data into the input * @brief Fill the random data into the input
*/ */
template <typename T> void fill_data(T *Parameter_0_0_host, T *Parameter_1_0_host); template <typename T> void fill_data(T *Parameter_0_0_host, T *Parameter_1_0_host, bool random = true);
/** /**
* @brief Prepare memory and data of the input and output * @brief Prepare memory and data of the input and output
*/ */
template <typename T> template <typename T>
void prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_0, T **Result_3_0, T **Parameter_0_0_host, void prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_0, T **Result_3_0, T **Parameter_0_0_host,
T **Parameter_1_0_host); T **Parameter_1_0_host, bool random = true);
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() {} virtual void prepare_tensor(bool random = true) {}
/** /**
* @brief Execute the kernel/function * @brief Execute the kernel/function
*/ */
...@@ -140,11 +141,15 @@ class CublasFunction { ...@@ -140,11 +141,15 @@ class CublasFunction {
* @param eps the acceptable error bound for numeric stability * @param eps the acceptable error bound for numeric stability
*/ */
void set_eps(double eps) { this->eps = eps; } void set_eps(double eps) { this->eps = eps; }
/**
* @brief Set the random data
* @param random_data if generate random data
*/
void set_random_data(bool random_data) { this->random_data = random_data; }
/** /**
* @brief Set the params string * @brief Set the params string
* @param str the str representing the params of the function * @param str the str representing the params of the function
*/ */
void set_function(std::string &str) { this->function_str_ = str; } void set_function(std::string &str) { this->function_str_ = str; }
/** /**
* @brief Set the name member * @brief Set the name member
...@@ -228,39 +233,59 @@ class CublasFunction { ...@@ -228,39 +233,59 @@ class CublasFunction {
/** /**
* @brief Fill the random data into the input in float type * @brief Fill the random data into the input in float type
*/ */
template <> void CublasFunction::fill_data(float *Parameter_0_0_host, float *Parameter_1_0_host) { template <> void CublasFunction::fill_data(float *Parameter_0_0_host, float *Parameter_1_0_host, bool random) {
srand(random_seed); if (random) {
for (int i = 0; i < m_ * k_ * batch_count_; i++) { srand(random_seed);
Parameter_0_0_host[i] = ((float)rand() / (float)(RAND_MAX)); for (int i = 0; i < m_ * k_ * batch_count_; i++) {
} Parameter_0_0_host[i] = ((float)rand() / (float)(RAND_MAX));
for (int i = 0; i < k_ * n_ * batch_count_; ++i) { }
Parameter_1_0_host[i] = ((float)rand() / (float)(RAND_MAX)); for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
Parameter_1_0_host[i] = ((float)rand() / (float)(RAND_MAX));
}
} else {
// memset the input data to fixed float value
memset(Parameter_0_0_host, 2,
(unsigned long)m_ * (unsigned long)k_ * (unsigned long)batch_count_ * sizeof(float));
memset(Parameter_1_0_host, 3,
(unsigned long)k_ * (unsigned long)n_ * (unsigned long)batch_count_ * sizeof(float));
} }
} }
/** /**
* @brief Fill the random data into the input in half type * @brief Fill the random data into the input in half type
*/ */
template <> void CublasFunction::fill_data(half *Parameter_0_0_host, half *Parameter_1_0_host) { template <> void CublasFunction::fill_data(half *Parameter_0_0_host, half *Parameter_1_0_host, bool random) {
srand(random_seed); if (random) {
for (int i = 0; i < m_ * k_ * batch_count_; i++) { srand(random_seed);
Parameter_0_0_host[i] = half((float)rand() / (float)(RAND_MAX)); for (int i = 0; i < m_ * k_ * batch_count_; i++) {
} Parameter_0_0_host[i] = half((float)rand() / (float)(RAND_MAX));
for (int i = 0; i < k_ * n_ * batch_count_; ++i) { }
Parameter_1_0_host[i] = half((float)rand() / (float)(RAND_MAX)); for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
Parameter_1_0_host[i] = half((float)rand() / (float)(RAND_MAX));
}
} else {
// memset the input data to fixed float value
std::fill(Parameter_0_0_host, Parameter_0_0_host + m_ * k_ * batch_count_, half(2.0));
std::fill(Parameter_1_0_host, Parameter_1_0_host + k_ * n_ * batch_count_, half(3.0));
} }
} }
/** /**
* @brief Fill the random data into the input in cuComplex type * @brief Fill the random data into the input in cuComplex type
*/ */
template <> void CublasFunction::fill_data(cuComplex *Parameter_0_0_host, cuComplex *Parameter_1_0_host) { template <> void CublasFunction::fill_data(cuComplex *Parameter_0_0_host, cuComplex *Parameter_1_0_host, bool random) {
srand(random_seed); if (random) {
for (int i = 0; i < m_ * k_ * batch_count_; i++) { srand(random_seed);
Parameter_0_0_host[i] = for (int i = 0; i < m_ * k_ * batch_count_; i++) {
make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX))); Parameter_0_0_host[i] =
} make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX)));
for (int i = 0; i < k_ * n_ * batch_count_; ++i) { }
Parameter_1_0_host[i] = for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX))); Parameter_1_0_host[i] =
make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX)));
}
} else {
// memset the input data to fixed float value
std::fill(Parameter_0_0_host, Parameter_0_0_host + m_ * k_ * batch_count_, make_cuComplex(2.0f, 2.0f));
std::fill(Parameter_1_0_host, Parameter_1_0_host + k_ * n_ * batch_count_, make_cuComplex(3.0f, 3.0f));
} }
} }
/** /**
...@@ -268,7 +293,7 @@ template <> void CublasFunction::fill_data(cuComplex *Parameter_0_0_host, cuComp ...@@ -268,7 +293,7 @@ template <> void CublasFunction::fill_data(cuComplex *Parameter_0_0_host, cuComp
*/ */
template <typename T> template <typename T>
void CublasFunction::prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_0, T **Result_3_0, void CublasFunction::prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_0, T **Result_3_0,
T **Parameter_0_0_host, T **Parameter_1_0_host) { T **Parameter_0_0_host, T **Parameter_1_0_host, bool random) {
int m = this->m_, n = this->n_, k = this->k_, batch_count = this->batch_count_; int m = this->m_, n = this->n_, k = this->k_, batch_count = this->batch_count_;
// input argument // input argument
CUDA_SAFE_CALL(cudaMallocHost((void **)Parameter_0_0_host, sizeof(T) * m * k * batch_count_)); CUDA_SAFE_CALL(cudaMallocHost((void **)Parameter_0_0_host, sizeof(T) * m * k * batch_count_));
...@@ -278,7 +303,7 @@ void CublasFunction::prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_ ...@@ -278,7 +303,7 @@ void CublasFunction::prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_
CUDA_SAFE_CALL(cudaMalloc((void **)Parameter_1_0, sizeof(T) * n * k * batch_count_)); CUDA_SAFE_CALL(cudaMalloc((void **)Parameter_1_0, sizeof(T) * n * k * batch_count_));
// fill input values // fill input values
fill_data(reinterpret_cast<T *>(*Parameter_0_0_host), reinterpret_cast<T *>(*Parameter_1_0_host)); fill_data(reinterpret_cast<T *>(*Parameter_0_0_host), reinterpret_cast<T *>(*Parameter_1_0_host), random);
// copy input data from host to device // copy input data from host to device
CUDA_SAFE_CALL( CUDA_SAFE_CALL(
...@@ -469,13 +494,12 @@ int CublasFunction::check_result(int batch_count, cuComplex *Result_3_0, std::co ...@@ -469,13 +494,12 @@ int CublasFunction::check_result(int batch_count, cuComplex *Result_3_0, std::co
*/ */
void CublasFunction::benchmark() { void CublasFunction::benchmark() {
// Malloc memory for input and output data // Malloc memory for input and output data
this->prepare_tensor(); bool random = this->correctness ? true : this->random_data;
this->prepare_tensor(random);
// Warm up // Warm up
for (int i_ = 0; i_ < warm_up; i_++) { for (int i_ = 0; i_ < warm_up; i_++) {
for (int j = 0; j < num_in_step; j++) { this->kernel_entry();
this->kernel_entry();
}
} }
CUDA_SAFE_CALL(cudaDeviceSynchronize()); CUDA_SAFE_CALL(cudaDeviceSynchronize());
......
...@@ -39,8 +39,9 @@ class SgemmFunction : public CublasFunction { ...@@ -39,8 +39,9 @@ class SgemmFunction : public CublasFunction {
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() { virtual void prepare_tensor(bool random) {
prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host); prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
random);
} }
/** /**
* @brief Check the correctness of function calculation result * @brief Check the correctness of function calculation result
...@@ -107,8 +108,9 @@ class CgemmFunction : public CublasFunction { ...@@ -107,8 +108,9 @@ class CgemmFunction : public CublasFunction {
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() { virtual void prepare_tensor(bool random) {
prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host); prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
random);
} }
/** /**
* @brief Check the correctness of function calculation result * @brief Check the correctness of function calculation result
...@@ -169,17 +171,17 @@ class GemmExFunction : public CublasFunction { ...@@ -169,17 +171,17 @@ class GemmExFunction : public CublasFunction {
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() { virtual void prepare_tensor(bool random) {
if (this->datatype_.compare("half") == 0) { if (this->datatype_.compare("half") == 0) {
CublasFunction::prepare_tensor_template<half>( CublasFunction::prepare_tensor_template<half>(
reinterpret_cast<half **>(&Parameter_0_0), reinterpret_cast<half **>(&Parameter_1_0), reinterpret_cast<half **>(&Parameter_0_0), reinterpret_cast<half **>(&Parameter_1_0),
reinterpret_cast<half **>(&Result_3_0), reinterpret_cast<half **>(&Parameter_0_0_host), reinterpret_cast<half **>(&Result_3_0), reinterpret_cast<half **>(&Parameter_0_0_host),
reinterpret_cast<half **>(&Parameter_1_0_host)); reinterpret_cast<half **>(&Parameter_1_0_host), random);
} else if (this->datatype_.compare("float") == 0) { } else if (this->datatype_.compare("float") == 0) {
CublasFunction::prepare_tensor_template<float>( CublasFunction::prepare_tensor_template<float>(
reinterpret_cast<float **>(&Parameter_0_0), reinterpret_cast<float **>(&Parameter_1_0), reinterpret_cast<float **>(&Parameter_0_0), reinterpret_cast<float **>(&Parameter_1_0),
reinterpret_cast<float **>(&Result_3_0), reinterpret_cast<float **>(&Parameter_0_0_host), reinterpret_cast<float **>(&Result_3_0), reinterpret_cast<float **>(&Parameter_0_0_host),
reinterpret_cast<float **>(&Parameter_1_0_host)); reinterpret_cast<float **>(&Parameter_1_0_host), random);
} }
} }
/** /**
...@@ -265,17 +267,17 @@ class GemmStridedBatchedExFunction : public CublasFunction { ...@@ -265,17 +267,17 @@ class GemmStridedBatchedExFunction : public CublasFunction {
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() { virtual void prepare_tensor(bool random) {
if (this->datatype_.compare("half") == 0) { if (this->datatype_.compare("half") == 0) {
prepare_tensor_template<half>( prepare_tensor_template<half>(
reinterpret_cast<half **>(&Parameter_0_0), reinterpret_cast<half **>(&Parameter_1_0), reinterpret_cast<half **>(&Parameter_0_0), reinterpret_cast<half **>(&Parameter_1_0),
reinterpret_cast<half **>(&Result_3_0), reinterpret_cast<half **>(&Parameter_0_0_host), reinterpret_cast<half **>(&Result_3_0), reinterpret_cast<half **>(&Parameter_0_0_host),
reinterpret_cast<half **>(&Parameter_1_0_host)); reinterpret_cast<half **>(&Parameter_1_0_host), random);
} else if (this->datatype_.compare("float") == 0) { } else if (this->datatype_.compare("float") == 0) {
prepare_tensor_template<float>( prepare_tensor_template<float>(
reinterpret_cast<float **>(&Parameter_0_0), reinterpret_cast<float **>(&Parameter_1_0), reinterpret_cast<float **>(&Parameter_0_0), reinterpret_cast<float **>(&Parameter_1_0),
reinterpret_cast<float **>(&Result_3_0), reinterpret_cast<float **>(&Parameter_0_0_host), reinterpret_cast<float **>(&Result_3_0), reinterpret_cast<float **>(&Parameter_0_0_host),
reinterpret_cast<float **>(&Parameter_1_0_host)); reinterpret_cast<float **>(&Parameter_1_0_host), random);
} }
} }
/** /**
...@@ -355,8 +357,9 @@ class SgemmStridedBatchedFunction : public CublasFunction { ...@@ -355,8 +357,9 @@ class SgemmStridedBatchedFunction : public CublasFunction {
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() { virtual void prepare_tensor(bool random) {
prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host); prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
random);
} }
/** /**
* @brief Function calculation on CPU side * @brief Function calculation on CPU side
...@@ -419,8 +422,9 @@ class Cgemm3mStridedBatchedFunction : public CublasFunction { ...@@ -419,8 +422,9 @@ class Cgemm3mStridedBatchedFunction : public CublasFunction {
/** /**
* @brief Prepare memory and data of the input and output for kernel running * @brief Prepare memory and data of the input and output for kernel running
*/ */
virtual void prepare_tensor() { virtual void prepare_tensor(bool random) {
prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host); prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
random);
} }
/** /**
* @brief Function calculation on CPU side * @brief Function calculation on CPU side
......
...@@ -98,6 +98,7 @@ class Options { ...@@ -98,6 +98,7 @@ class Options {
std::string para_info_json; std::string para_info_json;
bool correctness_check; bool correctness_check;
double eps; double eps;
bool random_data;
/** /**
* @brief Construct a options object according to cmd or set a default value used to test * @brief Construct a options object according to cmd or set a default value used to test
...@@ -120,6 +121,7 @@ class Options { ...@@ -120,6 +121,7 @@ class Options {
: para_info_json; : para_info_json;
correctness_check = get_cmd_line_argument_bool("--correctness"); correctness_check = get_cmd_line_argument_bool("--correctness");
eps = get_cmd_line_argument_double("--eps"); eps = get_cmd_line_argument_double("--eps");
random_data = get_cmd_line_argument_bool("--random_data");
} }
}; };
...@@ -241,6 +243,7 @@ void run_benchmark(Options &options) { ...@@ -241,6 +243,7 @@ void run_benchmark(Options &options) {
function.set_random_seed(options.random_seed); function.set_random_seed(options.random_seed);
function.set_correctness(options.correctness_check); function.set_correctness(options.correctness_check);
function.set_eps(options.eps); function.set_eps(options.eps);
function.set_random_data(options.random_data);
CublasFunction *p_function = get_cublas_function_pointer(function); CublasFunction *p_function = get_cublas_function_pointer(function);
p_function->benchmark(); p_function->benchmark();
delete p_function; delete p_function;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment