"vscode:/vscode.git/clone" did not exist on "c3df6d87d42ed0f4b047405a558b9850ff7b888e"
Commit 7277329e authored by Jing Zhang's avatar Jing Zhang Committed by root
Browse files

minor fix

parent c38d8fdc
......@@ -147,6 +147,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
#else
a_tensors_device[i]->ToDevice(a_tensors[i].mData.data());
b_tensors_device[i]->ToDevice(b_tensors[i].mData.data());
c_tensors_device[i]->SetZero();
#endif
p_a.push_back(a_tensors_device[i]->GetDeviceBuffer());
......
......@@ -533,9 +533,9 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
return;
}
const index_t block_m_id = __builtin_amdgcn_readfirstlane(blockIdx.y);
const index_t block_n_id = __builtin_amdgcn_readfirstlane(blockIdx.x);
const index_t k_batch_id = __builtin_amdgcn_readfirstlane(blockIdx.z);
const index_t block_m_id = __builtin_amdgcn_readfirstlane(block_work_idx[I1]);
const index_t block_n_id = __builtin_amdgcn_readfirstlane(block_work_idx[I2]);
const index_t k_batch_id = __builtin_amdgcn_readfirstlane(block_work_idx[I0]);
// HACK: this force m/n_block_data_idx_on_grid into SGPR
const index_t m_block_data_idx_on_grid =
......
......@@ -98,8 +98,6 @@ bool profile_grouped_gemm_impl(int do_verification,
a_m_k[i].GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 1.0}, num_thread);
b_k_n[i].GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5}, num_thread);
}
c_m_n_device_results[i].GenerateTensorValue(GeneratorTensor_0<CDataType>{}, num_thread);
}
using AElementOp = ck::tensor_operation::element_wise::PassThrough;
......@@ -134,13 +132,12 @@ bool profile_grouped_gemm_impl(int do_verification,
std::make_unique<DeviceMem>(sizeof(ADataType) * a_m_k[i].mDesc.GetElementSpaceSize()));
b_device_buf.emplace_back(
std::make_unique<DeviceMem>(sizeof(BDataType) * b_k_n[i].mDesc.GetElementSpaceSize()));
c_device_buf.emplace_back(std::make_unique<DeviceMem>(
sizeof(CDataType) * c_m_n_device_results[i].mDesc.GetElementSpaceSize()));
a_device_buf[i]->ToDevice(a_m_k[i].mData.data());
b_device_buf[i]->ToDevice(b_k_n[i].mData.data());
c_device_buf[i]->ToDevice(c_m_n_device_results[i].mData.data());
c_device_buf[i]->SetZero();
gemm_descs.push_back({Ms[i], Ns[i], Ks[i], StrideAs[i], StrideBs[i], StrideCs[i], {}});
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment