Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3173c3b3
Unverified
Commit
3173c3b3
authored
Feb 25, 2025
by
Rui Qiao
Committed by
GitHub
Feb 25, 2025
Browse files
[misc] Clean up ray compiled graph type hints (#13731)
parent
2d87d7d1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
6 deletions
+17
-6
vllm/executor/ray_distributed_executor.py
vllm/executor/ray_distributed_executor.py
+12
-4
vllm/executor/ray_utils.py
vllm/executor/ray_utils.py
+5
-2
No files found.
vllm/executor/ray_distributed_executor.py
View file @
3173c3b3
...
...
@@ -528,10 +528,18 @@ class RayDistributedExecutor(DistributedExecutorBase):
envs
.
VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM
)
with
InputNode
()
as
input_data
:
# Example DAG: PP=2, TP=4
# (ExecuteModelReq, None) -> 0 -> (ExecuteModelReq, IntermediateOutput) -> 4 -> SamplerOutput # noqa: E501
# -> 1 -> (ExecuteModelReq, IntermediateOutput) -> 5 -> SamplerOutput # noqa: E501
# -> 2 -> (ExecuteModelReq, IntermediateOutput) -> 6 -> SamplerOutput # noqa: E501
# -> 3 -> (ExecuteModelReq, IntermediateOutput) -> 7 -> SamplerOutput # noqa: E501
#
# For V0:
# ExecuteModelRequest -> 0 -> (ExecuteModelReq, IntermediateTensors) -> 4 -> SamplerOutput # noqa: E501
# ExecuteModelRequest -> 1 -> (ExecuteModelReq, IntermediateTensors) -> 5 -> SamplerOutput # noqa: E501
# ExecuteModelRequest -> 2 -> (ExecuteModelReq, IntermediateTensors) -> 6 -> SamplerOutput # noqa: E501
# ExecuteModelRequest -> 3 -> (ExecuteModelReq, IntermediateTensors) -> 7 -> SamplerOutput # noqa: E501
#
# For V1:
# SchedulerOutput -> 0 -> (SchedulerOutput, IntermediateTensors) -> 4 -> ModelRunnerOutput # noqa: E501
# SchedulerOutput -> 1 -> (SchedulerOutput, IntermediateTensors) -> 5 -> ModelRunnerOutput # noqa: E501
# SchedulerOutput -> 2 -> (SchedulerOutput, IntermediateTensors) -> 6 -> ModelRunnerOutput # noqa: E501
# SchedulerOutput -> 3 -> (SchedulerOutput, IntermediateTensors) -> 7 -> ModelRunnerOutput # noqa: E501
# All workers in the first TP group will take in the
# ExecuteModelRequest as input.
...
...
vllm/executor/ray_utils.py
View file @
3173c3b3
...
...
@@ -114,8 +114,11 @@ try:
def
execute_model_ray
(
self
,
scheduler_output
:
"SchedulerOutput"
,
)
->
"ModelRunnerOutput"
:
scheduler_output
:
Union
[
"SchedulerOutput"
,
Tuple
[
"SchedulerOutput"
,
"IntermediateTensors"
]],
)
->
Union
[
"ModelRunnerOutput"
,
Tuple
[
"SchedulerOutput"
,
"IntermediateTensors"
]]:
# this method is used to compile ray CG,
# and it needs a special logic of self.setup_device_if_necessary()
self
.
setup_device_if_necessary
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment