Commit 1f97aebb authored by wenjh's avatar wenjh
Browse files

Use hipMemcpy rather than hipMemcpyAsync


Signed-off-by: wenjh's avatarwenjh <wenjh@sugon.com>
parent 8665c111
......@@ -1610,10 +1610,10 @@ void hipblaslt_goupedgemm(std::vector<const Tensor*>& inputA, std::vector<const
// Copy them to device memory
// hipblaslt_ext::UserArguments* d_userArgs;
// NVTE_CHECK_CUDA(hipMallocAsync(&d_userArgs, m.size() * sizeof(hipblaslt_ext::UserArguments), stream));
NVTE_CHECK_CUDA(hipMemcpyAsync(d_userArgs,
NVTE_CHECK_CUDA(hipMemcpy(d_userArgs,
userArgs,
m.size() * sizeof(hipblaslt_ext::UserArguments),
hipMemcpyHostToDevice, stream));
hipMemcpyHostToDevice));
NVTE_CHECK_HIPBLASLT(groupedgemm.run(d_userArgs, stream));
// NVTE_CHECK_HIPBLASLT(groupedgemm.initialize(heuristicResult[0].algo, workspace, false, stream));
// NVTE_CHECK_HIPBLASLT(groupedgemm.run(stream));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment