Unverified Commit c25dbc2d authored by Siew's Capital Jarvis's avatar Siew's Capital Jarvis Committed by GitHub
Browse files

[Bugfix] Fix unclean shutdown crash with AllReduce Fusion workspace (#36955)


Signed-off-by: default avatarJarvis <brayden.stanley.0127@gmail.com>
parent 77d2a5f1
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import atexit
import threading
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
...@@ -132,9 +135,13 @@ def initialize_fi_ar_quant_workspace( ...@@ -132,9 +135,13 @@ def initialize_fi_ar_quant_workspace(
) )
_fi_ar_workspace_lock = threading.Lock()
def destroy_fi_ar_workspace(): def destroy_fi_ar_workspace():
global _fi_ar_workspace global _fi_ar_workspace
global _fi_ar_quant_workspace global _fi_ar_quant_workspace
with _fi_ar_workspace_lock:
if ( if (
_fi_ar_quant_workspace is not None _fi_ar_quant_workspace is not None
and _fi_ar_quant_workspace is not _fi_ar_workspace and _fi_ar_quant_workspace is not _fi_ar_workspace
...@@ -146,6 +153,9 @@ def destroy_fi_ar_workspace(): ...@@ -146,6 +153,9 @@ def destroy_fi_ar_workspace():
_fi_ar_workspace = None _fi_ar_workspace = None
atexit.register(destroy_fi_ar_workspace)
class FlashInferAllReduce: class FlashInferAllReduce:
def __init__( def __init__(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment