Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7353492a
Unverified
Commit
7353492a
authored
Jun 06, 2025
by
jmswen
Committed by
GitHub
Jun 06, 2025
Browse files
[Core] Raise when non-multi-instance DP clients target a DP rank (#19227)
Signed-off-by:
Jon Swenson
<
jmswen@gmail.com
>
parent
7661e92e
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
77 additions
and
12 deletions
+77
-12
tests/async_engine/test_async_llm_engine.py
tests/async_engine/test_async_llm_engine.py
+22
-0
tests/v1/engine/test_async_llm.py
tests/v1/engine/test_async_llm.py
+29
-0
tests/v1/test_async_llm_dp.py
tests/v1/test_async_llm_dp.py
+16
-9
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+4
-0
vllm/v1/engine/core_client.py
vllm/v1/engine/core_client.py
+0
-3
vllm/v1/engine/processor.py
vllm/v1/engine/processor.py
+6
-0
No files found.
tests/async_engine/test_async_llm_engine.py
View file @
7353492a
...
@@ -384,3 +384,25 @@ async def test_delayed_generator(async_engine, stop):
...
@@ -384,3 +384,25 @@ async def test_delayed_generator(async_engine, stop):
assert
final_output
is
not
None
assert
final_output
is
not
None
assert
len
(
final_output
.
outputs
[
0
].
token_ids
)
==
10
assert
len
(
final_output
.
outputs
[
0
].
token_ids
)
==
10
assert
final_output
.
finished
assert
final_output
.
finished
@
pytest
.
mark
.
asyncio
(
scope
=
"module"
)
async
def
test_invalid_argument
(
async_engine
):
scheduler_config
=
await
async_engine
.
get_scheduler_config
()
if
scheduler_config
.
num_scheduler_steps
!=
1
:
pytest
.
skip
(
"no need to test this one with multistep"
)
sampling_params
=
SamplingParams
(
temperature
=
0
,
min_tokens
=
10
,
max_tokens
=
10
,
)
# Targeting specific DP rank only supported in v1 multi-instance DP
with
pytest
.
raises
(
ValueError
):
async
for
_
in
async_engine
.
generate
(
"test"
,
sampling_params
,
request_id
=
uid
(),
data_parallel_rank
=
0
):
pass
tests/v1/engine/test_async_llm.py
View file @
7353492a
...
@@ -250,3 +250,32 @@ async def test_customize_loggers(monkeypatch):
...
@@ -250,3 +250,32 @@ async def test_customize_loggers(monkeypatch):
assert
len
(
engine
.
stat_loggers
)
==
1
assert
len
(
engine
.
stat_loggers
)
==
1
assert
len
(
engine
.
stat_loggers
[
0
])
==
1
assert
len
(
engine
.
stat_loggers
[
0
])
==
1
engine
.
stat_loggers
[
0
][
0
].
log
.
assert_called_once
()
engine
.
stat_loggers
[
0
][
0
].
log
.
assert_called_once
()
@
pytest
.
mark
.
asyncio
(
scope
=
"module"
)
async
def
test_dp_rank_argument
(
monkeypatch
:
pytest
.
MonkeyPatch
):
with
monkeypatch
.
context
()
as
m
,
ExitStack
()
as
after
:
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
engine
=
AsyncLLM
.
from_engine_args
(
TEXT_ENGINE_ARGS
)
after
.
callback
(
engine
.
shutdown
)
sampling_params
=
SamplingParams
(
max_tokens
=
100
,
output_kind
=
RequestOutputKind
.
DELTA
,
temperature
=
1.0
,
seed
=
33
)
# Test with valid DP rank.
async
for
_
in
engine
.
generate
(
request_id
=
"request-34"
,
prompt
=
TEXT_PROMPT
,
sampling_params
=
sampling_params
,
data_parallel_rank
=
0
):
pass
# Test with out-of-range DP rank.
with
pytest
.
raises
(
ValueError
):
async
for
_
in
engine
.
generate
(
request_id
=
"request-35"
,
prompt
=
TEXT_PROMPT
,
sampling_params
=
sampling_params
,
data_parallel_rank
=
1
):
pass
tests/v1/test_async_llm_dp.py
View file @
7353492a
...
@@ -29,12 +29,14 @@ if not current_platform.supports_v1(engine_args.create_model_config()):
...
@@ -29,12 +29,14 @@ if not current_platform.supports_v1(engine_args.create_model_config()):
allow_module_level
=
True
)
allow_module_level
=
True
)
async
def
generate
(
engine
:
AsyncLLM
,
async
def
generate
(
engine
:
AsyncLLM
,
request_id
:
str
,
request_id
:
str
,
prompt
:
PromptType
,
prompt
:
PromptType
,
output_kind
:
RequestOutputKind
,
output_kind
:
RequestOutputKind
,
max_tokens
:
int
,
max_tokens
:
int
,
prompt_logprobs
:
Optional
[
int
]
=
None
)
->
tuple
[
int
,
str
]:
prompt_logprobs
:
Optional
[
int
]
=
None
,
data_parallel_rank
:
Optional
[
int
]
=
None
)
->
tuple
[
int
,
str
]:
# Ensure generate doesn't complete too fast for cancellation test.
# Ensure generate doesn't complete too fast for cancellation test.
await
asyncio
.
sleep
(
0.2
)
await
asyncio
.
sleep
(
0.2
)
...
@@ -46,7 +48,8 @@ async def generate(engine: AsyncLLM,
...
@@ -46,7 +48,8 @@ async def generate(engine: AsyncLLM,
prompt_logprobs
=
prompt_logprobs
)
prompt_logprobs
=
prompt_logprobs
)
async
for
out
in
engine
.
generate
(
request_id
=
request_id
,
async
for
out
in
engine
.
generate
(
request_id
=
request_id
,
prompt
=
prompt
,
prompt
=
prompt
,
sampling_params
=
sampling_params
):
sampling_params
=
sampling_params
,
data_parallel_rank
=
data_parallel_rank
):
num_tokens
=
len
(
out
.
outputs
[
0
].
token_ids
)
num_tokens
=
len
(
out
.
outputs
[
0
].
token_ids
)
if
output_kind
==
RequestOutputKind
.
DELTA
:
if
output_kind
==
RequestOutputKind
.
DELTA
:
...
@@ -89,8 +92,12 @@ async def test_load(output_kind: RequestOutputKind,
...
@@ -89,8 +92,12 @@ async def test_load(output_kind: RequestOutputKind,
for
request_id
in
request_ids
:
for
request_id
in
request_ids
:
tasks
.
append
(
tasks
.
append
(
asyncio
.
create_task
(
asyncio
.
create_task
(
generate
(
engine
,
request_id
,
prompt
,
output_kind
,
generate
(
engine
,
NUM_EXPECTED_TOKENS
)))
request_id
,
prompt
,
output_kind
,
NUM_EXPECTED_TOKENS
,
data_parallel_rank
=
0
)))
# Confirm that we got all the EXPECTED tokens from the requests.
# Confirm that we got all the EXPECTED tokens from the requests.
done
,
pending
=
await
asyncio
.
wait
(
tasks
,
done
,
pending
=
await
asyncio
.
wait
(
tasks
,
return_when
=
asyncio
.
FIRST_EXCEPTION
)
return_when
=
asyncio
.
FIRST_EXCEPTION
)
...
...
vllm/engine/async_llm_engine.py
View file @
7353492a
...
@@ -494,6 +494,10 @@ class _AsyncLLMEngine(LLMEngine):
...
@@ -494,6 +494,10 @@ class _AsyncLLMEngine(LLMEngine):
if
arrival_time
is
None
:
if
arrival_time
is
None
:
arrival_time
=
time
.
time
()
arrival_time
=
time
.
time
()
if
data_parallel_rank
is
not
None
:
raise
ValueError
(
"Targeting data_parallel_rank only supported "
"in v1 client."
)
if
(
isinstance
(
prompt
,
dict
)
if
(
isinstance
(
prompt
,
dict
)
and
prompt
.
get
(
"prompt_embeds"
,
None
)
is
not
None
and
prompt
.
get
(
"prompt_embeds"
,
None
)
is
not
None
and
not
prompt
.
get
(
"prompt_token_ids"
,
None
)):
and
not
prompt
.
get
(
"prompt_token_ids"
,
None
)):
...
...
vllm/v1/engine/core_client.py
View file @
7353492a
...
@@ -1000,9 +1000,6 @@ class DPAsyncMPClient(AsyncMPClient):
...
@@ -1000,9 +1000,6 @@ class DPAsyncMPClient(AsyncMPClient):
)
->
CoreEngine
:
)
->
CoreEngine
:
if
dp_rank
is
not
None
:
if
dp_rank
is
not
None
:
# engines are already in rank order
# engines are already in rank order
if
dp_rank
<
0
or
dp_rank
>=
len
(
self
.
core_engines
):
raise
ValueError
(
f
"Requested DP rank
{
dp_rank
}
is out of "
f
"range [0,
{
len
(
self
.
core_engines
)
}
)"
)
return
self
.
core_engines
[
dp_rank
]
return
self
.
core_engines
[
dp_rank
]
if
not
self
.
lb_engines
:
if
not
self
.
lb_engines
:
...
...
vllm/v1/engine/processor.py
View file @
7353492a
...
@@ -226,6 +226,12 @@ class Processor:
...
@@ -226,6 +226,12 @@ class Processor:
if
prompt_adapter_request
is
not
None
:
if
prompt_adapter_request
is
not
None
:
raise
ValueError
(
"V1 does not support prompt_adapter_request."
)
raise
ValueError
(
"V1 does not support prompt_adapter_request."
)
data_parallel_size
=
self
.
vllm_config
.
parallel_config
.
data_parallel_size
if
data_parallel_rank
is
not
None
and
not
(
0
<=
data_parallel_rank
<
data_parallel_size
):
raise
ValueError
(
f
"data_parallel_rank
{
data_parallel_rank
}
"
f
"is out of range [0,
{
data_parallel_size
}
)."
)
if
arrival_time
is
None
:
if
arrival_time
is
None
:
arrival_time
=
time
.
time
()
arrival_time
=
time
.
time
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment