Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c78f6594
Commit
c78f6594
authored
Apr 10, 2025
by
lizhigong
Browse files
add auto finish thread when use LLM object
parent
fdf9bf98
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
2 deletions
+3
-2
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+2
-2
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+1
-0
No files found.
vllm/engine/llm_engine.py
View file @
c78f6594
...
@@ -1318,7 +1318,7 @@ class LLMEngine:
...
@@ -1318,7 +1318,7 @@ class LLMEngine:
seq
.
append_token_id
(
sample
.
output_token
,
sample
.
logprobs
)
seq
.
append_token_id
(
sample
.
output_token
,
sample
.
logprobs
)
def
finish_thread
(
self
):
def
finish_thread
(
self
):
if
self
.
zero_overhead
:
if
self
.
zero_overhead
and
self
.
thread_running
:
self
.
thread_running
=
False
self
.
thread_running
=
False
self
.
sem_m2s
.
release
()
self
.
sem_m2s
.
release
()
...
@@ -1390,8 +1390,8 @@ class LLMEngine:
...
@@ -1390,8 +1390,8 @@ class LLMEngine:
def
zero_overhead_step
(
self
)
->
List
[
Union
[
RequestOutput
,
PoolingRequestOutput
]]:
def
zero_overhead_step
(
self
)
->
List
[
Union
[
RequestOutput
,
PoolingRequestOutput
]]:
if
not
self
.
thread_running
:
if
not
self
.
thread_running
:
self
.
zero_thread
.
join
()
self
.
zero_thread
.
join
()
self
.
zero_thread
=
threading
.
Thread
(
target
=
self
.
thread_zero_overhead
)
self
.
thread_running
=
True
self
.
thread_running
=
True
self
.
zero_thread
=
threading
.
Thread
(
target
=
self
.
thread_zero_overhead
)
self
.
zero_thread
.
start
()
self
.
zero_thread
.
start
()
self
.
sem_m2s
.
release
()
self
.
sem_m2s
.
release
()
recode_output
=
self
.
q_recorder
.
get
()
recode_output
=
self
.
q_recorder
.
get
()
...
...
vllm/entrypoints/llm.py
View file @
c78f6594
...
@@ -1412,6 +1412,7 @@ class LLM:
...
@@ -1412,6 +1412,7 @@ class LLM:
if
use_tqdm
:
if
use_tqdm
:
pbar
.
close
()
pbar
.
close
()
self
.
llm_engine
.
finish_thread
()
# Sort the outputs by request ID.
# Sort the outputs by request ID.
# This is necessary because some requests may be finished earlier than
# This is necessary because some requests may be finished earlier than
# its previous requests.
# its previous requests.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment