Unverified Commit c25dbc2d authored by Siew's Capital Jarvis's avatar Siew's Capital Jarvis Committed by GitHub
Browse files

[Bugfix] Fix unclean shutdown crash with AllReduce Fusion workspace (#36955)


Signed-off-by: default avatarJarvis <brayden.stanley.0127@gmail.com>
parent 77d2a5f1
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import atexit
import threading
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
...@@ -132,18 +135,25 @@ def initialize_fi_ar_quant_workspace( ...@@ -132,18 +135,25 @@ def initialize_fi_ar_quant_workspace(
) )
_fi_ar_workspace_lock = threading.Lock()
def destroy_fi_ar_workspace(): def destroy_fi_ar_workspace():
global _fi_ar_workspace global _fi_ar_workspace
global _fi_ar_quant_workspace global _fi_ar_quant_workspace
if ( with _fi_ar_workspace_lock:
_fi_ar_quant_workspace is not None if (
and _fi_ar_quant_workspace is not _fi_ar_workspace _fi_ar_quant_workspace is not None
): and _fi_ar_quant_workspace is not _fi_ar_workspace
_fi_ar_quant_workspace.destroy() ):
_fi_ar_quant_workspace = None _fi_ar_quant_workspace.destroy()
if _fi_ar_workspace is not None: _fi_ar_quant_workspace = None
_fi_ar_workspace.destroy() if _fi_ar_workspace is not None:
_fi_ar_workspace = None _fi_ar_workspace.destroy()
_fi_ar_workspace = None
atexit.register(destroy_fi_ar_workspace)
class FlashInferAllReduce: class FlashInferAllReduce:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment