Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2382ad29
Unverified
Commit
2382ad29
authored
Feb 22, 2025
by
youkaichao
Committed by
GitHub
Feb 22, 2025
Browse files
[ci] fix linter (#13701)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
3e472d88
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
11 additions
and
7 deletions
+11
-7
examples/offline_inference/data_parallel.py
examples/offline_inference/data_parallel.py
+5
-4
vllm/config.py
vllm/config.py
+1
-1
vllm/utils.py
vllm/utils.py
+1
-0
vllm/v1/engine/core_client.py
vllm/v1/engine/core_client.py
+2
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+2
-1
No files found.
examples/offline_inference/data_parallel.py
View file @
2382ad29
...
...
@@ -48,14 +48,15 @@ def main(dp_size, dp_rank, dp_master_ip, dp_master_port, GPUs_per_dp_rank):
max_tokens
=
16
*
(
dp_rank
+
1
))
# Create an LLM.
llm
=
LLM
(
model
=
"facebook/opt-125m"
,
tensor_parallel_size
=
2
,
enforce_eager
=
True
)
llm
=
LLM
(
model
=
"facebook/opt-125m"
,
tensor_parallel_size
=
2
,
enforce_eager
=
True
)
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
# Print the outputs.
for
output
in
outputs
:
prompt
=
output
.
prompt
generated_text
=
output
.
outputs
[
0
].
text
print
(
f
"DP rank
{
dp_rank
}
, Prompt:
{
prompt
!
r
}
, "
print
(
f
"DP rank
{
dp_rank
}
, Prompt:
{
prompt
!
r
}
, "
f
"Generated text:
{
generated_text
!
r
}
"
)
...
...
vllm/config.py
View file @
2382ad29
vllm/utils.py
View file @
2382ad29
...
...
@@ -518,6 +518,7 @@ def get_open_port() -> int:
return
port
return
_get_open_port
()
def
_get_open_port
()
->
int
:
port
=
envs
.
VLLM_PORT
if
port
is
not
None
:
...
...
vllm/v1/engine/core_client.py
View file @
2382ad29
...
...
@@ -343,6 +343,7 @@ class SyncMPClient(MPClient):
def
execute_dummy_batch
(
self
)
->
None
:
self
.
_call_utility
(
"execute_dummy_batch"
)
class
AsyncMPClient
(
MPClient
):
"""Asyncio-compatible client for multi-proc EngineCore."""
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
2382ad29
...
...
@@ -1167,7 +1167,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
for
k
,
v
in
self
.
intermediate_tensors
.
items
()
})
with
set_forward_context
(
None
,
self
.
vllm_config
,
num_tokens
=
num_tokens
):
with
set_forward_context
(
None
,
self
.
vllm_config
,
num_tokens
=
num_tokens
):
hidden_states
=
model
(
input_ids
=
input_ids
,
positions
=
positions
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment