Commit c78f6594 authored by lizhigong's avatar lizhigong
Browse files

add auto finish thread when use LLM object

parent fdf9bf98
......@@ -1318,7 +1318,7 @@ class LLMEngine:
seq.append_token_id(sample.output_token, sample.logprobs)
def finish_thread(self):
if self.zero_overhead:
if self.zero_overhead and self.thread_running:
self.thread_running = False
self.sem_m2s.release()
......@@ -1390,8 +1390,8 @@ class LLMEngine:
def zero_overhead_step(self) -> List[Union[RequestOutput, PoolingRequestOutput]]:
if not self.thread_running:
self.zero_thread.join()
self.zero_thread = threading.Thread(target=self.thread_zero_overhead)
self.thread_running = True
self.zero_thread = threading.Thread(target=self.thread_zero_overhead)
self.zero_thread.start()
self.sem_m2s.release()
recode_output = self.q_recorder.get()
......
......@@ -1412,6 +1412,7 @@ class LLM:
if use_tqdm:
pbar.close()
self.llm_engine.finish_thread()
# Sort the outputs by request ID.
# This is necessary because some requests may be finished earlier than
# its previous requests.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment