Commit 351d607d authored by lizhigong's avatar lizhigong
Browse files

fix stop remote worker bug

parent 9076ef2b
......@@ -44,7 +44,7 @@ from vllm.usage.usage_lib import UsageContext
from vllm.utils import (Counter, Device, deprecate_args, deprecate_kwargs,
is_list_of)
from vllm.zero_overhead.v0.llm_engine import ZeroOverheadEngine
from vllm.zero_overhead.v0.utils import is_zero_auto_thread, is_zero_overhead
from vllm.zero_overhead.v0.utils import is_zero_overhead
logger = init_logger(__name__)
......@@ -1450,8 +1450,7 @@ class LLM:
if use_tqdm:
pbar.close()
if is_zero_auto_thread():
self.llm_engine.finish_thread()
# Sort the outputs by request ID.
# This is necessary because some requests may be finished earlier than
# its previous requests.
......
......@@ -289,6 +289,8 @@ class ZeroOverheadEngine(LLMEngine):
while True:
self.sem_m2s.acquire()
if not self.thread_running:
logger.debug("Stopping remote worker execution loop.")
self.model_executor.stop_remote_worker_execution_loop()
break
virtual_engine = 0
......@@ -394,8 +396,9 @@ class ZeroOverheadEngine(LLMEngine):
# torch.distributed ops which may otherwise timeout, and unblocks
# the RPC thread in the workers so that they can process any other
# queued control plane messages, such as add/remove lora adapters.
logger.debug("Stopping remote worker execution loop.")
self.model_executor.stop_remote_worker_execution_loop()
# logger.debug("Stopping remote worker execution loop.")
# self.model_executor.stop_remote_worker_execution_loop()
self.finish_thread()
return ctx.request_outputs
......
......@@ -3,14 +3,10 @@
import os
zero_overhead = os.environ.get('VLLM_ZERO_OVERHEAD') == '1'
disable_auto_finish_thread = os.environ.get('VLLM_ZERO_DISABLE_AUTO_THREAD') == '1'
zero_no_thread = os.environ.get('VLLM_ZERO_NO_THREAD') == '1'
def is_zero_overhead():
return zero_overhead
def is_zero_auto_thread():
return (not disable_auto_finish_thread) and zero_overhead and (not zero_no_thread)
def is_zero_no_thread():
return zero_no_thread and zero_overhead
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment