Commit c78f6594 authored by lizhigong's avatar lizhigong
Browse files

add auto finish thread when use LLM object

parent fdf9bf98
...@@ -1318,7 +1318,7 @@ class LLMEngine: ...@@ -1318,7 +1318,7 @@ class LLMEngine:
seq.append_token_id(sample.output_token, sample.logprobs) seq.append_token_id(sample.output_token, sample.logprobs)
def finish_thread(self): def finish_thread(self):
if self.zero_overhead: if self.zero_overhead and self.thread_running:
self.thread_running = False self.thread_running = False
self.sem_m2s.release() self.sem_m2s.release()
...@@ -1390,8 +1390,8 @@ class LLMEngine: ...@@ -1390,8 +1390,8 @@ class LLMEngine:
def zero_overhead_step(self) -> List[Union[RequestOutput, PoolingRequestOutput]]: def zero_overhead_step(self) -> List[Union[RequestOutput, PoolingRequestOutput]]:
if not self.thread_running: if not self.thread_running:
self.zero_thread.join() self.zero_thread.join()
self.zero_thread = threading.Thread(target=self.thread_zero_overhead)
self.thread_running = True self.thread_running = True
self.zero_thread = threading.Thread(target=self.thread_zero_overhead)
self.zero_thread.start() self.zero_thread.start()
self.sem_m2s.release() self.sem_m2s.release()
recode_output = self.q_recorder.get() recode_output = self.q_recorder.get()
......
...@@ -1412,6 +1412,7 @@ class LLM: ...@@ -1412,6 +1412,7 @@ class LLM:
if use_tqdm: if use_tqdm:
pbar.close() pbar.close()
self.llm_engine.finish_thread()
# Sort the outputs by request ID. # Sort the outputs by request ID.
# This is necessary because some requests may be finished earlier than # This is necessary because some requests may be finished earlier than
# its previous requests. # its previous requests.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment