"vscode:/vscode.git/clone" did not exist on "82bc797f17eee5f830edcd058e79390a0c5acff6"
Unverified Commit d1967d55 authored by Daniel Stokes's avatar Daniel Stokes Committed by GitHub
Browse files

fix: Add stream synchronization before destroying MPI communicator (#1979)


Signed-off-by: default avatardjns99 <40156487+djns99@users.noreply.github.com>
parent e0204fbb
...@@ -133,14 +133,21 @@ CommOverlapCore::~CommOverlapCore() { ...@@ -133,14 +133,21 @@ CommOverlapCore::~CommOverlapCore() {
if (_atomic_gemm) cudaFree(_counter.dptr()); if (_atomic_gemm) cudaFree(_counter.dptr());
for (size_t i = 0; i < _stream_compute.size(); i++) cudaStreamDestroy(_stream_compute[i]); for (size_t i = 0; i < _stream_compute.size(); i++) {
cudaStreamSynchronize(_stream_compute[i]);
cudaStreamDestroy(_stream_compute[i]);
}
if (_comm_created) { if (_comm_created) {
try {
#ifdef NVTE_UB_WITH_MPI #ifdef NVTE_UB_WITH_MPI
destroy_communicator_mpi(_ub_comm); destroy_communicator_mpi(_ub_comm);
#else #else
destroy_communicator(_ub_comm); destroy_communicator(_ub_comm);
#endif #endif
} catch (const std::exception &e) {
NVTE_WARN("Error destroying communicator, cleanup may be incomplete:\n", e.what());
}
_comm_created = false; _comm_created = false;
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment