Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
22cf679a
Unverified
Commit
22cf679a
authored
Aug 22, 2025
by
Didier Durand
Committed by
GitHub
Aug 22, 2025
Browse files
[Doc]: fix various typos in multiple files (#23179)
Signed-off-by:
Didier Durand
<
durand.didier@gmail.com
>
parent
b6d7d34f
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
12 additions
and
12 deletions
+12
-12
vllm/beam_search.py
vllm/beam_search.py
+1
-1
vllm/compilation/backends.py
vllm/compilation/backends.py
+1
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+3
-3
vllm/engine/multiprocessing/client.py
vllm/engine/multiprocessing/client.py
+2
-2
vllm/entrypoints/chat_utils.py
vllm/entrypoints/chat_utils.py
+1
-1
vllm/utils/__init__.py
vllm/utils/__init__.py
+2
-2
vllm/v1/structured_output/__init__.py
vllm/v1/structured_output/__init__.py
+2
-2
No files found.
vllm/beam_search.py
View file @
22cf679a
...
@@ -18,7 +18,7 @@ class BeamSearchSequence:
...
@@ -18,7 +18,7 @@ class BeamSearchSequence:
The text field is optional and will only be filled when the sequence is
The text field is optional and will only be filled when the sequence is
about to be returned to the user.
about to be returned to the user.
"""
"""
# The tokens include
s
the prompt.
# The tokens include the prompt.
tokens
:
list
[
int
]
tokens
:
list
[
int
]
logprobs
:
list
[
dict
[
int
,
Logprob
]]
logprobs
:
list
[
dict
[
int
,
Logprob
]]
lora_request
:
Optional
[
LoRARequest
]
=
None
lora_request
:
Optional
[
LoRARequest
]
=
None
...
...
vllm/compilation/backends.py
View file @
22cf679a
...
@@ -484,7 +484,7 @@ class VllmBackend:
...
@@ -484,7 +484,7 @@ class VllmBackend:
factors
=
[]
factors
=
[]
# 0. factors come from the env, for example, The values of
# 0. factors come from the env, for example, The values of
# VLLM_PP_LAYER_PARTITION will affect
s
the computation graph.
# VLLM_PP_LAYER_PARTITION will affect the computation graph.
env_hash
=
envs
.
compute_hash
()
env_hash
=
envs
.
compute_hash
()
factors
.
append
(
env_hash
)
factors
.
append
(
env_hash
)
...
...
vllm/engine/arg_utils.py
View file @
22cf679a
...
@@ -605,7 +605,7 @@ class EngineArgs:
...
@@ -605,7 +605,7 @@ class EngineArgs:
**
guided_decoding_kwargs
[
"disable_additional_properties"
])
**
guided_decoding_kwargs
[
"disable_additional_properties"
])
guided_decoding_group
.
add_argument
(
guided_decoding_group
.
add_argument
(
"--reasoning-parser"
,
"--reasoning-parser"
,
# This choice
s
is a special case because it's not static
# This choice is a special case because it's not static
choices
=
list
(
ReasoningParserManager
.
reasoning_parsers
),
choices
=
list
(
ReasoningParserManager
.
reasoning_parsers
),
**
guided_decoding_kwargs
[
"reasoning_backend"
])
**
guided_decoding_kwargs
[
"reasoning_backend"
])
...
@@ -1047,7 +1047,7 @@ class EngineArgs:
...
@@ -1047,7 +1047,7 @@ class EngineArgs:
# details from the config directly
# details from the config directly
# no user input required / expected
# no user input required / expected
if
isinstance
(
hf_config
,
SpeculatorsConfig
):
if
isinstance
(
hf_config
,
SpeculatorsConfig
):
# We create one since we dont create one
# We create one since we don
'
t create one
self
.
speculative_config
=
{}
self
.
speculative_config
=
{}
self
.
speculative_config
[
self
.
speculative_config
[
"num_speculative_tokens"
]
=
hf_config
.
num_lookahead_tokens
"num_speculative_tokens"
]
=
hf_config
.
num_lookahead_tokens
...
@@ -1775,7 +1775,7 @@ class AsyncEngineArgs(EngineArgs):
...
@@ -1775,7 +1775,7 @@ class AsyncEngineArgs(EngineArgs):
def
add_cli_args
(
parser
:
FlexibleArgumentParser
,
def
add_cli_args
(
parser
:
FlexibleArgumentParser
,
async_args_only
:
bool
=
False
)
->
FlexibleArgumentParser
:
async_args_only
:
bool
=
False
)
->
FlexibleArgumentParser
:
# Initialize plugin to update the parser, for example, The plugin may
# Initialize plugin to update the parser, for example, The plugin may
# add
ing
a new kind of quantization method to --quantization argument or
# add a new kind of quantization method to --quantization argument or
# a new device to --device argument.
# a new device to --device argument.
load_general_plugins
()
load_general_plugins
()
if
not
async_args_only
:
if
not
async_args_only
:
...
...
vllm/engine/multiprocessing/client.py
View file @
22cf679a
...
@@ -539,7 +539,7 @@ class MQLLMEngineClient(EngineClient):
...
@@ -539,7 +539,7 @@ class MQLLMEngineClient(EngineClient):
if
request_id
in
self
.
output_queues
:
if
request_id
in
self
.
output_queues
:
raise
ValueError
(
f
"Request
{
request_id
}
already exists"
)
raise
ValueError
(
f
"Request
{
request_id
}
already exists"
)
# 1) Create output queue for this request
s
.
# 1) Create output queue for this request.
queue
:
asyncio
.
Queue
[
Union
[
RequestOutput
,
queue
:
asyncio
.
Queue
[
Union
[
RequestOutput
,
BaseException
]]
=
asyncio
.
Queue
()
BaseException
]]
=
asyncio
.
Queue
()
self
.
output_queues
[
request_id
]
=
queue
self
.
output_queues
[
request_id
]
=
queue
...
@@ -651,7 +651,7 @@ class MQLLMEngineClient(EngineClient):
...
@@ -651,7 +651,7 @@ class MQLLMEngineClient(EngineClient):
# Uses the same I/O as generate requests
# Uses the same I/O as generate requests
request
=
RPCLoadAdapterRequest
(
lora_request
)
request
=
RPCLoadAdapterRequest
(
lora_request
)
# Create output queue for this request
s
.
# Create output queue for this request.
queue
:
asyncio
.
Queue
[
Union
[
None
,
BaseException
]]
=
asyncio
.
Queue
()
queue
:
asyncio
.
Queue
[
Union
[
None
,
BaseException
]]
=
asyncio
.
Queue
()
self
.
output_queues
[
request
.
request_id
]
=
queue
self
.
output_queues
[
request
.
request_id
]
=
queue
...
...
vllm/entrypoints/chat_utils.py
View file @
22cf679a
...
@@ -1330,7 +1330,7 @@ def apply_mistral_chat_template(
...
@@ -1330,7 +1330,7 @@ def apply_mistral_chat_template(
# mistral-common uses assert statements to stop processing of input
# mistral-common uses assert statements to stop processing of input
# if input does not comply with the expected format.
# if input does not comply with the expected format.
# We convert those assertion errors to ValueErrors so they can be
# We convert those assertion errors to ValueErrors so they can be
#
are
properly caught in the preprocessing_input step
# properly caught in the preprocessing_input step
except
(
AssertionError
,
MistralCommonException
)
as
e
:
except
(
AssertionError
,
MistralCommonException
)
as
e
:
raise
ValueError
(
str
(
e
))
from
e
raise
ValueError
(
str
(
e
))
from
e
...
...
vllm/utils/__init__.py
View file @
22cf679a
...
@@ -2482,7 +2482,7 @@ class PlaceholderModule(_PlaceholderBase):
...
@@ -2482,7 +2482,7 @@ class PlaceholderModule(_PlaceholderBase):
A placeholder object to use when a module does not exist.
A placeholder object to use when a module does not exist.
This enables more informative errors when trying to access attributes
This enables more informative errors when trying to access attributes
of a module that does not exist
s
.
of a module that does not exist.
"""
"""
def
__init__
(
self
,
name
:
str
)
->
None
:
def
__init__
(
self
,
name
:
str
)
->
None
:
...
@@ -3109,7 +3109,7 @@ class LazyLoader(types.ModuleType):
...
@@ -3109,7 +3109,7 @@ class LazyLoader(types.ModuleType):
"""
"""
LazyLoader module borrowed from Tensorflow
LazyLoader module borrowed from Tensorflow
https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
https://github.com/tensorflow/tensorflow/blob/main/tensorflow/python/util/lazy_loader.py
with a addition of "module caching".
with a
n
addition of "module caching".
Lazily import a module, mainly to avoid pulling in large dependencies.
Lazily import a module, mainly to avoid pulling in large dependencies.
Modules such as `xgrammar` might do additional side effects, so we
Modules such as `xgrammar` might do additional side effects, so we
...
...
vllm/v1/structured_output/__init__.py
View file @
22cf679a
...
@@ -267,7 +267,7 @@ class StructuredOutputManager:
...
@@ -267,7 +267,7 @@ class StructuredOutputManager:
assert
request
.
structured_output_request
is
not
None
assert
request
.
structured_output_request
is
not
None
assert
request
.
structured_output_request
.
grammar
is
not
None
assert
request
.
structured_output_request
.
grammar
is
not
None
# by default, we should always advance
# by default, we should always advance
# for cases that do
es
n't use
s
thinking mode.
# for cases that don't use thinking mode.
if
self
.
reasoner
is
not
None
:
if
self
.
reasoner
is
not
None
:
structured_req
=
request
.
structured_output_request
structured_req
=
request
.
structured_output_request
...
@@ -276,7 +276,7 @@ class StructuredOutputManager:
...
@@ -276,7 +276,7 @@ class StructuredOutputManager:
# Check if reasoning ends in *this* step
# Check if reasoning ends in *this* step
if
self
.
reasoner
.
is_reasoning_end
(
request
.
all_token_ids
):
if
self
.
reasoner
.
is_reasoning_end
(
request
.
all_token_ids
):
# Reasoning just ended, so we shouldn't advance
d
til
# Reasoning just ended, so we shouldn't advance til
# next pass
# next pass
structured_req
.
reasoning_ended
=
True
structured_req
.
reasoning_ended
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment