Commit 36d1b311 authored by Aleksander Dudek's avatar Aleksander Dudek
Browse files

[CK_TILE] Move hipmalloc/memcpy calls out of gpu reference gemm - review changes

parent 862f1bbe
...@@ -188,52 +188,23 @@ int run_batched_gemm_example_with_layouts(int argc, ...@@ -188,52 +188,23 @@ int run_batched_gemm_example_with_layouts(int argc,
c_m_n_gpu_ref.SetZero(); c_m_n_gpu_ref.SetZero();
c_m_n_gpu_buf_ref.SetZero(); c_m_n_gpu_buf_ref.SetZero();
//// TEST
ADataType* d_A; ADataType* d_A;
BDataType* d_B; BDataType* d_B;
CDataType* d_C; CDataType* d_C;
hipError_t errA = hipMalloc(&d_A, batch_count * M * K * sizeof(ADataType)); ck_tile::hip_check_error(hipMalloc(&d_A, batch_count * M * K * sizeof(ADataType)));
hipError_t errB = hipMalloc(&d_B, batch_count * N * K * sizeof(BDataType)); ck_tile::hip_check_error(hipMalloc(&d_B, batch_count * N * K * sizeof(BDataType)));
hipError_t errC = hipMalloc(&d_C, batch_count * M * N * sizeof(CDataType)); ck_tile::hip_check_error(hipMalloc(&d_C, batch_count * M * N * sizeof(CDataType)));
if(errA != hipSuccess)
{
std::cerr << "Error allocating device memory for A: " << hipGetErrorString(errA)
<< std::endl;
return EXIT_FAILURE; // Early exit on error
}
if(errB != hipSuccess)
{
std::cerr << "Error allocating device memory for B: " << hipGetErrorString(errB)
<< std::endl;
return EXIT_FAILURE; // Early exit on error
}
if(errC != hipSuccess)
{
std::cerr << "Error allocating device memory for C: " << hipGetErrorString(errC)
<< std::endl;
return EXIT_FAILURE; // Early exit on error
}
errA = hipMemcpy(d_A, ck_tile::hip_check_error(hipMemcpy(d_A,
a_m_k_dev_buf.GetDeviceBuffer(), a_m_k_dev_buf.GetDeviceBuffer(),
batch_count * M * K * sizeof(ADataType), batch_count * M * K * sizeof(ADataType),
hipMemcpyHostToDevice); hipMemcpyHostToDevice));
if(errA != hipSuccess)
{
std::cerr << "Error copying A to device: " << hipGetErrorString(errA) << std::endl;
}
errB = hipMemcpy(d_B, ck_tile::hip_check_error(hipMemcpy(d_B,
b_k_n_dev_buf.GetDeviceBuffer(), b_k_n_dev_buf.GetDeviceBuffer(),
batch_count * N * K * sizeof(BDataType), batch_count * N * K * sizeof(BDataType),
hipMemcpyHostToDevice); hipMemcpyHostToDevice));
if(errB != hipSuccess)
{
std::cerr << "Error copying B to device: " << hipGetErrorString(errB) << std::endl;
}
ck_tile::reference_batched_gemm_gpu<ADataType, ck_tile::reference_batched_gemm_gpu<ADataType,
BDataType, BDataType,
...@@ -255,34 +226,14 @@ int run_batched_gemm_example_with_layouts(int argc, ...@@ -255,34 +226,14 @@ int run_batched_gemm_example_with_layouts(int argc,
batch_stride_C, batch_stride_C,
batch_count); batch_count);
errC = hipMemcpy(c_m_n_gpu_buf_ref.GetDeviceBuffer(), ck_tile::hip_check_error(hipMemcpy(c_m_n_gpu_buf_ref.GetDeviceBuffer(),
d_C, d_C,
batch_count * M * N * sizeof(CDataType), batch_count * M * N * sizeof(CDataType),
hipMemcpyDeviceToHost); hipMemcpyDeviceToHost));
if(errC != hipSuccess)
{
std::cerr << "Error copying C to device: " << hipGetErrorString(errC) << std::endl;
}
errA = hipFree(d_A);
if(errA != hipSuccess)
{
std::cerr << "Error free the A memory: " << hipGetErrorString(errA) << std::endl;
}
errB = hipFree(d_B);
if(errB != hipSuccess)
{
std::cerr << "Error free the B memory: " << hipGetErrorString(errB) << std::endl;
}
errC = hipFree(d_C);
if(errC != hipSuccess)
{
std::cerr << "Error free the C memory: " << hipGetErrorString(errC) << std::endl;
}
//// TEST ck_tile::hip_check_error(hipFree(d_A));
ck_tile::hip_check_error(hipFree(d_B));
ck_tile::hip_check_error(hipFree(d_C));
c_m_n_gpu_buf_ref.FromDevice(c_m_n_gpu_ref.data()); c_m_n_gpu_buf_ref.FromDevice(c_m_n_gpu_ref.data());
pass = ck_tile::check_err(c_m_n_dev_result, c_m_n_gpu_ref); pass = ck_tile::check_err(c_m_n_dev_result, c_m_n_gpu_ref);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment