Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0808eb81
Unverified
Commit
0808eb81
authored
Nov 28, 2025
by
Cyrus Leung
Committed by
GitHub
Nov 28, 2025
Browse files
[Misc] Remove `yapf` directives (#29675)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
460d8bbf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
43 deletions
+52
-43
vllm/distributed/ec_transfer/ec_connector/factory.py
vllm/distributed/ec_transfer/ec_connector/factory.py
+0
-3
vllm/entrypoints/openai/serving_tokens.py
vllm/entrypoints/openai/serving_tokens.py
+52
-40
No files found.
vllm/distributed/ec_transfer/ec_connector/factory.py
View file @
0808eb81
...
...
@@ -5,15 +5,12 @@ import importlib
from
collections.abc
import
Callable
from
typing
import
TYPE_CHECKING
# yapf: disable
from
vllm.distributed.ec_transfer.ec_connector.base
import
(
ECConnectorBase
,
ECConnectorRole
,
)
from
vllm.logger
import
init_logger
# yapf: enable
if
TYPE_CHECKING
:
from
vllm.config
import
ECTransferConfig
,
VllmConfig
...
...
vllm/entrypoints/openai/serving_tokens.py
View file @
0808eb81
...
...
@@ -7,7 +7,6 @@ from collections.abc import Sequence as GenericSequence
from
fastapi
import
Request
# yapf: disable
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.logger
import
RequestLogger
from
vllm.entrypoints.openai.protocol
import
(
...
...
@@ -49,22 +48,26 @@ class ServingTokens(OpenAIServing):
enable_prompt_tokens_details
:
bool
=
False
,
enable_log_outputs
:
bool
=
False
,
):
super
().
__init__
(
engine_client
=
engine_client
,
models
=
models
,
request_logger
=
request_logger
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
log_error_stack
=
log_error_stack
)
super
().
__init__
(
engine_client
=
engine_client
,
models
=
models
,
request_logger
=
request_logger
,
return_tokens_as_token_ids
=
return_tokens_as_token_ids
,
log_error_stack
=
log_error_stack
,
)
self
.
enable_prompt_tokens_details
=
enable_prompt_tokens_details
self
.
enable_log_outputs
=
enable_log_outputs
self
.
force_no_detokenize
=
force_no_detokenize
if
force_no_detokenize
:
logger
.
info
(
"Tokens-only mode is enabled, skipping detokenization "
"step for incoming requests."
)
logger
.
info
(
"Tokens-only mode is enabled, skipping detokenization "
"step for incoming requests."
)
async
def
serve_tokens
(
self
,
request
:
GenerateRequest
,
raw_request
:
Request
|
None
=
None
raw_request
:
Request
|
None
=
None
,
)
->
GenerateResponse
|
ErrorResponse
:
error_check_ret
=
await
self
.
_check_model
(
request
)
if
error_check_ret
is
not
None
:
...
...
@@ -78,13 +81,13 @@ class ServingTokens(OpenAIServing):
raise
self
.
engine_client
.
dead_error
lora_request
=
None
lora_request
=
self
.
_maybe_get_adapters
(
request
,
supports_default_mm_loras
=
True
)
lora_request
=
self
.
_maybe_get_adapters
(
request
,
supports_default_mm_loras
=
True
)
model_name
=
self
.
models
.
model_name
(
lora_request
)
request_id
=
"generate-tokens-"
\
f
"
{
self
.
_base_request_id
(
raw_request
,
request
.
request_id
)
}
"
request_id
=
(
f
"generate-tokens-
{
self
.
_base_request_id
(
raw_request
,
request
.
request_id
)
}
"
)
request_metadata
=
RequestResponseMetadata
(
request_id
=
request_id
)
if
raw_request
:
...
...
@@ -106,13 +109,18 @@ class ServingTokens(OpenAIServing):
if
self
.
force_no_detokenize
:
sampling_params
.
detokenize
=
False
self
.
_log_inputs
(
request_id
,
request
.
token_ids
,
params
=
sampling_params
,
lora_request
=
lora_request
)
self
.
_log_inputs
(
request_id
,
request
.
token_ids
,
params
=
sampling_params
,
lora_request
=
lora_request
,
)
trace_headers
=
(
None
if
raw_request
is
None
else
await
self
.
_get_trace_headers
(
raw_request
.
headers
))
trace_headers
=
(
None
if
raw_request
is
None
else
await
self
.
_get_trace_headers
(
raw_request
.
headers
)
)
result_generator
=
self
.
engine_client
.
generate
(
engine_prompt
,
...
...
@@ -131,8 +139,8 @@ class ServingTokens(OpenAIServing):
try
:
assert
result_generator
is
not
None
return
await
self
.
serve_tokens_full_generator
(
request
,
result_generator
,
request_id
,
model_name
,
request_metadata
)
request
,
result_generator
,
request_id
,
model_name
,
request_metadata
)
except
ValueError
as
e
:
return
self
.
create_error_response
(
str
(
e
))
...
...
@@ -144,7 +152,6 @@ class ServingTokens(OpenAIServing):
model_name
:
str
,
request_metadata
:
RequestResponseMetadata
,
)
->
ErrorResponse
|
GenerateResponse
:
created_time
=
int
(
time
.
time
())
final_res
:
RequestOutput
|
None
=
None
sampling_params
:
SamplingParams
=
request
.
sampling_params
...
...
@@ -179,9 +186,9 @@ class ServingTokens(OpenAIServing):
choice_data
=
GenerateResponseChoice
(
index
=
output
.
index
,
logprobs
=
logprobs
,
finish_reason
=
output
.
finish_reason
if
output
.
finish_reason
else
"stop"
,
token_ids
=
as_list
(
output
.
token_ids
)
)
finish_reason
=
output
.
finish_reason
if
output
.
finish_reason
else
"stop"
,
token_ids
=
as_list
(
output
.
token_ids
)
,
)
choices
.
append
(
choice_data
)
num_generated_tokens
+=
len
(
output
.
token_ids
)
...
...
@@ -191,14 +198,16 @@ class ServingTokens(OpenAIServing):
if
final_res
.
encoder_prompt_token_ids
is
not
None
:
num_prompt_tokens
+=
len
(
final_res
.
encoder_prompt_token_ids
)
usage
=
UsageInfo
(
prompt_tokens
=
num_prompt_tokens
,
completion_tokens
=
num_generated_tokens
,
total_tokens
=
num_prompt_tokens
+
num_generated_tokens
)
usage
=
UsageInfo
(
prompt_tokens
=
num_prompt_tokens
,
completion_tokens
=
num_generated_tokens
,
total_tokens
=
num_prompt_tokens
+
num_generated_tokens
,
)
if
self
.
enable_prompt_tokens_details
and
final_res
.
num_cached_tokens
:
# This info is not available at the /coordinator level
usage
.
prompt_tokens_details
=
PromptTokenUsageInfo
(
cached_tokens
=
final_res
.
num_cached_tokens
)
cached_tokens
=
final_res
.
num_cached_tokens
)
request_metadata
.
final_usage_info
=
usage
...
...
@@ -218,14 +227,13 @@ class ServingTokens(OpenAIServing):
# Get the corresponding output token IDs
output_token_ids
=
None
if
choice
.
index
<
len
(
final_res
.
outputs
):
output_token_ids
=
final_res
.
outputs
[
choice
.
index
].
token_ids
output_token_ids
=
final_res
.
outputs
[
choice
.
index
].
token_ids
if
output_token_ids
:
# Log token_ids only.
self
.
request_logger
.
log_outputs
(
request_id
=
request_id
,
outputs
=
""
,
outputs
=
""
,
output_token_ids
=
output_token_ids
,
finish_reason
=
choice
.
finish_reason
,
is_streaming
=
False
,
...
...
@@ -246,10 +254,12 @@ class ServingTokens(OpenAIServing):
for
i
,
token_id
in
enumerate
(
token_ids
):
token
=
f
"token_id:
{
token_id
}
"
step_top_logprobs
=
top_logprobs
[
i
]
if
step_top_logprobs
is
None
or
step_top_logprobs
.
get
(
token_id
)
is
None
:
if
step_top_logprobs
is
None
or
step_top_logprobs
.
get
(
token_id
)
is
None
:
logprobs_content
.
append
(
ChatCompletionLogProbsContent
(
token
=
token
,
))
ChatCompletionLogProbsContent
(
token
=
token
,
)
)
else
:
step_token
=
step_top_logprobs
[
token_id
]
...
...
@@ -261,9 +271,11 @@ class ServingTokens(OpenAIServing):
ChatCompletionLogProb
(
token
=
token
,
logprob
=
max
(
p
[
1
].
logprob
,
-
9999.0
),
)
for
i
,
p
in
enumerate
(
step_top_logprobs
.
items
())
if
num_output_top_logprobs
and
i
<
num_output_top_logprobs
]))
)
for
i
,
p
in
enumerate
(
step_top_logprobs
.
items
())
if
num_output_top_logprobs
and
i
<
num_output_top_logprobs
],
)
)
return
ChatCompletionLogProbs
(
content
=
logprobs_content
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment