Commit d42cb0d0 authored by Aleksander Dudek's avatar Aleksander Dudek
Browse files

[CK_TILE] Move hipmalloc/memcpy calls out of gpu reference gemm - review fix

parent bf210540
...@@ -165,43 +165,18 @@ int run_gemm_example_with_layouts(int argc, ...@@ -165,43 +165,18 @@ int run_gemm_example_with_layouts(int argc,
BDataType* d_B; BDataType* d_B;
CDataType* d_C; CDataType* d_C;
hipError_t errA = hipMalloc(&d_A, M * K * sizeof(ADataType)); ck_tile::hip_check_error(hipMalloc(&d_A, M * K * sizeof(ADataType)));
hipError_t errB = hipMalloc(&d_B, N * K * sizeof(BDataType)); ck_tile::hip_check_error(hipMalloc(&d_B, N * K * sizeof(BDataType)));
hipError_t errC = hipMalloc(&d_C, M * N * sizeof(CDataType)); ck_tile::hip_check_error(hipMalloc(&d_C, M * N * sizeof(CDataType)));
if(errA != hipSuccess)
{ ck_tile::hip_check_error(hipMemcpy(d_A,
std::cerr << "Error allocating device memory for A: " << hipGetErrorString(errA) a_m_k_dev_buf.GetDeviceBuffer(),
<< std::endl; M * K * sizeof(ADataType),
return EXIT_FAILURE; // Early exit on error hipMemcpyHostToDevice));
} ck_tile::hip_check_error(hipMemcpy(d_B,
b_k_n_dev_buf.GetDeviceBuffer(),
if(errB != hipSuccess) N * K * sizeof(BDataType),
{ hipMemcpyHostToDevice));
std::cerr << "Error allocating device memory for B: " << hipGetErrorString(errB)
<< std::endl;
return EXIT_FAILURE; // Early exit on error
}
if(errC != hipSuccess)
{
std::cerr << "Error allocating device memory for C: " << hipGetErrorString(errC)
<< std::endl;
return EXIT_FAILURE; // Early exit on error
}
errA = hipMemcpy(
d_A, a_m_k_dev_buf.GetDeviceBuffer(), M * K * sizeof(ADataType), hipMemcpyHostToDevice);
if(errA != hipSuccess)
{
std::cerr << "Error copying A to device: " << hipGetErrorString(errA) << std::endl;
}
errB = hipMemcpy(
d_B, b_k_n_dev_buf.GetDeviceBuffer(), N * K * sizeof(BDataType), hipMemcpyHostToDevice);
if(errB != hipSuccess)
{
std::cerr << "Error copying B to device: " << hipGetErrorString(errB) << std::endl;
}
ck_tile::reference_gemm_gpu<ADataType, ck_tile::reference_gemm_gpu<ADataType,
BDataType, BDataType,
...@@ -211,32 +186,14 @@ int run_gemm_example_with_layouts(int argc, ...@@ -211,32 +186,14 @@ int run_gemm_example_with_layouts(int argc,
BLayout, BLayout,
CLayout>(d_A, d_B, d_C, M, N, K, stride_A, stride_B, stride_C); CLayout>(d_A, d_B, d_C, M, N, K, stride_A, stride_B, stride_C);
errC = hipMemcpy(c_m_n_gpu_buf_ref.GetDeviceBuffer(), ck_tile::hip_check_error(hipMemcpy(c_m_n_gpu_buf_ref.GetDeviceBuffer(),
d_C, d_C,
M * N * sizeof(CDataType), M * N * sizeof(CDataType),
hipMemcpyDeviceToHost); hipMemcpyDeviceToHost));
if(errC != hipSuccess)
{
std::cerr << "Error copying C to device: " << hipGetErrorString(errC) << std::endl;
}
errA = hipFree(d_A);
if(errA != hipSuccess)
{
std::cerr << "Error free the A memory: " << hipGetErrorString(errA) << std::endl;
}
errB = hipFree(d_B);
if(errB != hipSuccess)
{
std::cerr << "Error free the B memory: " << hipGetErrorString(errB) << std::endl;
}
errC = hipFree(d_C); ck_tile::hip_check_error(hipFree(d_A));
if(errC != hipSuccess) ck_tile::hip_check_error(hipFree(d_B));
{ ck_tile::hip_check_error(hipFree(d_C));
std::cerr << "Error free the C memory: " << hipGetErrorString(errC) << std::endl;
}
c_m_n_gpu_buf_ref.FromDevice(c_m_n_gpu_ref.data()); c_m_n_gpu_buf_ref.FromDevice(c_m_n_gpu_ref.data());
pass = ck_tile::check_err(c_m_n_dev_result, c_m_n_gpu_ref); pass = ck_tile::check_err(c_m_n_dev_result, c_m_n_gpu_ref);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment