Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9832e557
Unverified
Commit
9832e557
authored
Dec 24, 2024
by
Rui Qiao
Committed by
GitHub
Dec 24, 2024
Browse files
[V1] Unify VLLM_ENABLE_V1_MULTIPROCESSING handling in RayExecutor (#11472)
parent
3f3e92e1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
8 deletions
+4
-8
tests/basic_correctness/test_basic_correctness.py
tests/basic_correctness/test_basic_correctness.py
+0
-5
vllm/v1/engine/llm_engine.py
vllm/v1/engine/llm_engine.py
+0
-2
vllm/v1/executor/ray_executor.py
vllm/v1/executor/ray_executor.py
+4
-1
No files found.
tests/basic_correctness/test_basic_correctness.py
View file @
9832e557
...
@@ -127,11 +127,6 @@ def test_models_distributed(
...
@@ -127,11 +127,6 @@ def test_models_distributed(
if
attention_backend
:
if
attention_backend
:
os
.
environ
[
"VLLM_ATTENTION_BACKEND"
]
=
attention_backend
os
.
environ
[
"VLLM_ATTENTION_BACKEND"
]
=
attention_backend
# Import VLLM_USE_V1 dynamically to handle patching
from
vllm.envs
import
VLLM_USE_V1
if
VLLM_USE_V1
and
distributed_executor_backend
!=
"mp"
:
os
.
environ
[
"VLLM_ENABLE_V1_MULTIPROCESSING"
]
=
"0"
dtype
=
"half"
dtype
=
"half"
max_tokens
=
5
max_tokens
=
5
...
...
vllm/v1/engine/llm_engine.py
View file @
9832e557
...
@@ -21,7 +21,6 @@ from vllm.v1.engine.core_client import EngineCoreClient
...
@@ -21,7 +21,6 @@ from vllm.v1.engine.core_client import EngineCoreClient
from
vllm.v1.engine.detokenizer
import
Detokenizer
from
vllm.v1.engine.detokenizer
import
Detokenizer
from
vllm.v1.engine.processor
import
Processor
from
vllm.v1.engine.processor
import
Processor
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.ray_utils
import
initialize_ray_cluster
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -112,7 +111,6 @@ class LLMEngine:
...
@@ -112,7 +111,6 @@ class LLMEngine:
distributed_executor_backend
=
(
distributed_executor_backend
=
(
vllm_config
.
parallel_config
.
distributed_executor_backend
)
vllm_config
.
parallel_config
.
distributed_executor_backend
)
if
distributed_executor_backend
==
"ray"
:
if
distributed_executor_backend
==
"ray"
:
initialize_ray_cluster
(
vllm_config
.
parallel_config
)
from
vllm.v1.executor.ray_executor
import
RayExecutor
from
vllm.v1.executor.ray_executor
import
RayExecutor
executor_class
=
RayExecutor
executor_class
=
RayExecutor
elif
distributed_executor_backend
==
"mp"
:
elif
distributed_executor_backend
==
"mp"
:
...
...
vllm/v1/executor/ray_executor.py
View file @
9832e557
...
@@ -8,7 +8,8 @@ from vllm.config import VllmConfig
...
@@ -8,7 +8,8 @@ from vllm.config import VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils
import
get_distributed_init_method
,
get_ip
,
get_open_port
from
vllm.utils
import
get_distributed_init_method
,
get_ip
,
get_open_port
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.abstract
import
Executor
from
vllm.v1.executor.ray_utils
import
RayWorkerWrapper
,
ray
from
vllm.v1.executor.ray_utils
import
(
RayWorkerWrapper
,
initialize_ray_cluster
,
ray
)
from
vllm.v1.outputs
import
ModelRunnerOutput
from
vllm.v1.outputs
import
ModelRunnerOutput
if
ray
is
not
None
:
if
ray
is
not
None
:
...
@@ -33,7 +34,9 @@ class RayExecutor(Executor):
...
@@ -33,7 +34,9 @@ class RayExecutor(Executor):
if
ray_usage
!=
"1"
:
if
ray_usage
!=
"1"
:
os
.
environ
[
"RAY_USAGE_STATS_ENABLED"
]
=
"0"
os
.
environ
[
"RAY_USAGE_STATS_ENABLED"
]
=
"0"
initialize_ray_cluster
(
self
.
parallel_config
)
placement_group
=
self
.
parallel_config
.
placement_group
placement_group
=
self
.
parallel_config
.
placement_group
# Create the parallel GPU workers.
# Create the parallel GPU workers.
self
.
_init_workers_ray
(
placement_group
)
self
.
_init_workers_ray
(
placement_group
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment