fix stop remote worker bug

351d607d · lizhigong · 9076ef2b · 351d607d · 351d607d · 351d607d
Commit 351d607d authored Apr 27, 2025 by lizhigong
Showing with 7 additions and 9 deletions

vllm/entrypoints/llm.py vllm/entrypoints/llm.py +2 -3

vllm/zero_overhead/v0/llm_engine.py vllm/zero_overhead/v0/llm_engine.py +5 -2

vllm/zero_overhead/v0/utils.py vllm/zero_overhead/v0/utils.py +0 -4

No files found.
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -44,7 +44,7 @@ from vllm.usage.usage_lib import UsageContext
 from vllm.utils import (Counter, Device, deprecate_args, deprecate_kwargs,
                        is_list_of)
 from vllm.zero_overhead.v0.llm_engine import ZeroOverheadEngine
-from vllm.zero_overhead.v0.utils import is_zero_auto_thread, is_zero_overhead
+from vllm.zero_overhead.v0.utils import is_zero_overhead

 logger = init_logger(__name__)

@@ -1450,8 +1450,7 @@ class LLM:

        if use_tqdm:
            pbar.close()
-        if is_zero_auto_thread():
-            self.llm_engine.finish_thread()
+
        # Sort the outputs by request ID.
        # This is necessary because some requests may be finished earlier than
        # its previous requests.

--- a/vllm/zero_overhead/v0/llm_engine.py
+++ b/vllm/zero_overhead/v0/llm_engine.py
@@ -289,6 +289,8 @@ class ZeroOverheadEngine(LLMEngine):
            while True:
                self.sem_m2s.acquire()
                if not self.thread_running:
+                    logger.debug("Stopping remote worker execution loop.")
+                    self.model_executor.stop_remote_worker_execution_loop()
                    break

                virtual_engine = 0
@@ -394,8 +396,9 @@ class ZeroOverheadEngine(LLMEngine):
            # torch.distributed ops which may otherwise timeout, and unblocks
            # the RPC thread in the workers so that they can process any other
            # queued control plane messages, such as add/remove lora adapters.
-            logger.debug("Stopping remote worker execution loop.")
-            self.model_executor.stop_remote_worker_execution_loop()
+            # logger.debug("Stopping remote worker execution loop.")
+            # self.model_executor.stop_remote_worker_execution_loop()
+            self.finish_thread()
        return ctx.request_outputs
    
    

--- a/vllm/zero_overhead/v0/utils.py
+++ b/vllm/zero_overhead/v0/utils.py
@@ -3,14 +3,10 @@
 import os

 zero_overhead = os.environ.get('VLLM_ZERO_OVERHEAD') == '1'
-disable_auto_finish_thread = os.environ.get('VLLM_ZERO_DISABLE_AUTO_THREAD') == '1'
 zero_no_thread = os.environ.get('VLLM_ZERO_NO_THREAD') == '1'

 def is_zero_overhead():
    return zero_overhead

-def is_zero_auto_thread():
-    return (not disable_auto_finish_thread) and zero_overhead and (not zero_no_thread)
-
 def is_zero_no_thread():
    return zero_no_thread and zero_overhead
\ No newline at end of file