Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
024e595d
Commit
024e595d
authored
Mar 26, 2025
by
lizhigong
Browse files
add server zero overhead engine
parent
08c4bafa
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
3 additions
and
1 deletion
+3
-1
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+2
-0
vllm/model_executor/layers/update_input.py
vllm/model_executor/layers/update_input.py
+0
-0
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+1
-1
No files found.
vllm/engine/async_llm_engine.py
View file @
024e595d
...
@@ -280,6 +280,8 @@ class _AsyncLLMEngine(LLMEngine):
...
@@ -280,6 +280,8 @@ class _AsyncLLMEngine(LLMEngine):
"""
"""
# these are cached outputs from previous iterations. None if on first
# these are cached outputs from previous iterations. None if on first
# iteration
# iteration
if
self
.
zero_overhead
:
return
self
.
zero_overhead_step
()
cached_outputs
=
self
.
cached_scheduler_outputs
[
virtual_engine
]
cached_outputs
=
self
.
cached_scheduler_outputs
[
virtual_engine
]
seq_group_metadata_list
=
cached_outputs
.
seq_group_metadata_list
seq_group_metadata_list
=
cached_outputs
.
seq_group_metadata_list
scheduler_outputs
=
cached_outputs
.
scheduler_outputs
scheduler_outputs
=
cached_outputs
.
scheduler_outputs
...
...
vllm/model_executor/layers/
ops/
update_input.py
→
vllm/model_executor/layers/update_input.py
View file @
024e595d
File moved
vllm/worker/model_runner.py
View file @
024e595d
...
@@ -60,7 +60,7 @@ from vllm.worker.model_runner_base import (
...
@@ -60,7 +60,7 @@ from vllm.worker.model_runner_base import (
_init_attn_metadata_from_tensor_dict
,
_init_attn_metadata_from_tensor_dict
,
_init_sampling_metadata_from_tensor_dict
)
_init_sampling_metadata_from_tensor_dict
)
from
vllm.model_executor.layers.
ops.
update_input
import
UpdateInputTokens
from
vllm.model_executor.layers.update_input
import
UpdateInputTokens
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.attention.backends.abstract
import
AttentionBackend
from
vllm.attention.backends.abstract
import
AttentionBackend
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment