Commit db108e89 authored by Ville Pietilä's avatar Ville Pietilä
Browse files

Fix handling of mempool size when allocating/deallocating memory.

parent d0a846c2
...@@ -47,6 +47,7 @@ namespace memory { ...@@ -47,6 +47,7 @@ namespace memory {
{ {
void* p = memory_pool_[sizeInBytes].front(); void* p = memory_pool_[sizeInBytes].front();
memory_pool_[sizeInBytes].pop(); memory_pool_[sizeInBytes].pop();
memPoolSizeInBytes_ -= sizeInBytes;
return p; return p;
} }
void* p; void* p;
...@@ -65,6 +66,11 @@ namespace memory { ...@@ -65,6 +66,11 @@ namespace memory {
// If the memory pool size exceeds the maximum size, free the memory. // If the memory pool size exceeds the maximum size, free the memory.
if (memPoolSizeInBytes_ > maxMemoryPoolSizeInBytes_) if (memPoolSizeInBytes_ > maxMemoryPoolSizeInBytes_)
{ {
if (enableLogging_)
{
std::cout << "[ MemPool ] Clearing pool queue for size " << sizeInBytes << std::endl;
}
memPoolSizeInBytes_ -= sizeInBytes * q.size();
clearMemoryPoolQueue(q); clearMemoryPoolQueue(q);
} }
} }
...@@ -84,6 +90,8 @@ namespace memory { ...@@ -84,6 +90,8 @@ namespace memory {
{ {
void* p = q.front(); void* p = q.front();
q.pop(); q.pop();
// This performs an implicit hipDeviceSynchronize().
// Does this create a deadlock situation when grouped GEMM is used in distributed training with NCCL?
hip_check_error(hipHostFree(p)); hip_check_error(hipHostFree(p));
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment