Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fad73be1
Unverified
Commit
fad73be1
authored
Sep 02, 2025
by
Didier Durand
Committed by
GitHub
Sep 02, 2025
Browse files
[Doc]: fix typos in Python comments (#24077)
Signed-off-by:
Didier Durand
<
durand.didier@gmail.com
>
parent
56d04089
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
19 additions
and
19 deletions
+19
-19
tests/async_engine/test_api_server.py
tests/async_engine/test_api_server.py
+1
-1
tests/core/block/e2e/test_correctness.py
tests/core/block/e2e/test_correctness.py
+2
-2
tests/engine/test_arg_utils.py
tests/engine/test_arg_utils.py
+1
-1
tests/kernels/moe/test_deepep_deepgemm_moe.py
tests/kernels/moe/test_deepep_deepgemm_moe.py
+1
-1
tests/lora/test_add_lora.py
tests/lora/test_add_lora.py
+2
-2
tests/lora/test_lora_allowed_token_ids.py
tests/lora/test_lora_allowed_token_ids.py
+2
-2
tests/models/language/generation/test_common.py
tests/models/language/generation/test_common.py
+1
-1
tests/models/language/generation/test_mistral.py
tests/models/language/generation/test_mistral.py
+2
-2
tests/models/multimodal/generation/test_qwen2_vl.py
tests/models/multimodal/generation/test_qwen2_vl.py
+2
-2
tests/v1/core/test_kv_cache_utils.py
tests/v1/core/test_kv_cache_utils.py
+1
-1
tests/v1/executor/test_executor.py
tests/v1/executor/test_executor.py
+1
-1
tests/v1/spec_decode/test_eagle.py
tests/v1/spec_decode/test_eagle.py
+1
-1
tests/v1/test_kv_sharing.py
tests/v1/test_kv_sharing.py
+1
-1
tests/v1/worker/test_gpu_model_runner.py
tests/v1/worker/test_gpu_model_runner.py
+1
-1
No files found.
tests/async_engine/test_api_server.py
View file @
fad73be1
...
@@ -98,7 +98,7 @@ def test_api_server(api_server, distributed_executor_backend: str):
...
@@ -98,7 +98,7 @@ def test_api_server(api_server, distributed_executor_backend: str):
pool
.
join
()
pool
.
join
()
# check cancellation stats
# check cancellation stats
# give it some time
s
to update the stats
# give it some time to update the stats
time
.
sleep
(
1
)
time
.
sleep
(
1
)
num_aborted_requests
=
requests
.
get
(
num_aborted_requests
=
requests
.
get
(
...
...
tests/core/block/e2e/test_correctness.py
View file @
fad73be1
...
@@ -439,10 +439,10 @@ def test_auto_prefix_caching_with_preemption(baseline_llm_generator,
...
@@ -439,10 +439,10 @@ def test_auto_prefix_caching_with_preemption(baseline_llm_generator,
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
def
test_auto_prefix_caching_after_eviction_start
(
baseline_llm_generator
,
def
test_auto_prefix_caching_after_eviction_start
(
baseline_llm_generator
,
test_llm_generator
):
test_llm_generator
):
"""Verify block manager v2 with auto prefix caching could work
s
normal
"""Verify block manager v2 with auto prefix caching could work normal
ly
even when eviction started.
even when eviction started.
With APC enabled, all blocks are held by native block at the beginning.
With APC enabled, all blocks are held by native block at the beginning.
Then blocks are managed by evictor instead. If cache hit at the evitor's
Then blocks are managed by evictor instead. If cache hit at the evi
c
tor's
block, then it could be reused, or we need to recompute its kv cache.
block, then it could be reused, or we need to recompute its kv cache.
"""
"""
output_len
=
10
output_len
=
10
...
...
tests/engine/test_arg_utils.py
View file @
fad73be1
...
@@ -167,7 +167,7 @@ def test_get_kwargs():
...
@@ -167,7 +167,7 @@ def test_get_kwargs():
# dict should have json tip in help
# dict should have json tip in help
json_tip
=
"Should either be a valid JSON string or JSON keys"
json_tip
=
"Should either be a valid JSON string or JSON keys"
assert
json_tip
in
kwargs
[
"json_tip"
][
"help"
]
assert
json_tip
in
kwargs
[
"json_tip"
][
"help"
]
# nested config should
should
construct the nested config
# nested config should construct the nested config
assert
kwargs
[
"nested_config"
][
"type"
](
'{"field": 2}'
)
==
NestedConfig
(
2
)
assert
kwargs
[
"nested_config"
][
"type"
](
'{"field": 2}'
)
==
NestedConfig
(
2
)
...
...
tests/kernels/moe/test_deepep_deepgemm_moe.py
View file @
fad73be1
...
@@ -282,7 +282,7 @@ def triton_impl(a: torch.Tensor, topk_ids: torch.Tensor,
...
@@ -282,7 +282,7 @@ def triton_impl(a: torch.Tensor, topk_ids: torch.Tensor,
a1_scale
=
a1_scale
,
a1_scale
=
a1_scale
,
block_shape
=
block_shape
,
block_shape
=
block_shape
,
# Make sure this is set to False so we
# Make sure this is set to False so we
# dont end up comparing the same implementation.
# don
'
t end up comparing the same implementation.
allow_deep_gemm
=
False
)
allow_deep_gemm
=
False
)
...
...
tests/lora/test_add_lora.py
View file @
fad73be1
...
@@ -59,10 +59,10 @@ async def requests_processing_time(llm,
...
@@ -59,10 +59,10 @@ async def requests_processing_time(llm,
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_add_lora
(
chatglm3_lora_files
):
async
def
test_add_lora
(
chatglm3_lora_files
):
"""
"""
The add_lora function is used to pre
-
load some LoRA adapters into the
The add_lora function is used to preload some LoRA adapters into the
engine in anticipation of future requests using these adapters. To test
engine in anticipation of future requests using these adapters. To test
this functionality, we use the async engine to process some requests - We
this functionality, we use the async engine to process some requests - We
do it twice, once with add_lora() pre
-
loading and once without.
do it twice, once with add_lora() preloading and once without.
We measure the request processing time in both cases and expect the time
We measure the request processing time in both cases and expect the time
to be lesser in the case with add_lora() calls.
to be lesser in the case with add_lora() calls.
...
...
tests/lora/test_lora_allowed_token_ids.py
View file @
fad73be1
...
@@ -18,7 +18,7 @@ def test_allowed_token_ids_with_lora_vocab(llama_2_7b_base_huggingface_id,
...
@@ -18,7 +18,7 @@ def test_allowed_token_ids_with_lora_vocab(llama_2_7b_base_huggingface_id,
adapters that define additional tokens.
adapters that define additional tokens.
"""
"""
# Setup a base model compatible with the sql_lora_files adapter and
# Set
up a base model compatible with the sql_lora_files adapter and
# a known number of tokens in the base model.
# a known number of tokens in the base model.
model_config
=
ModelConfig
(
model_config
=
ModelConfig
(
model
=
llama_2_7b_base_huggingface_id
,
model
=
llama_2_7b_base_huggingface_id
,
...
@@ -84,7 +84,7 @@ def test_allowed_token_ids_with_lora_adapter_no_vocab(
...
@@ -84,7 +84,7 @@ def test_allowed_token_ids_with_lora_adapter_no_vocab(
adapters that do not define additional tokens.
adapters that do not define additional tokens.
"""
"""
# Setup a base model compatible with the qwen25vl_lora_files adapter and
# Set
up a base model compatible with the qwen25vl_lora_files adapter and
# a known number of tokens in the base model.
# a known number of tokens in the base model.
model_config
=
ModelConfig
(
model_config
=
ModelConfig
(
model
=
qwen25vl_base_huggingface_id
,
model
=
qwen25vl_base_huggingface_id
,
...
...
tests/models/language/generation/test_common.py
View file @
fad73be1
...
@@ -13,7 +13,7 @@ from ...registry import HF_EXAMPLE_MODELS
...
@@ -13,7 +13,7 @@ from ...registry import HF_EXAMPLE_MODELS
from
...utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
# These have unsupported head_dim for FA. We do not
# These have unsupported head_dim for FA. We do not
#
not
have a clean way to fall back, so we fail with
# have a clean way to fall back, so we fail with
# a clear msg when it happens.
# a clear msg when it happens.
# https://github.com/vllm-project/vllm/issues/14524
# https://github.com/vllm-project/vllm/issues/14524
REQUIRES_V0
=
[
"microsoft/phi-2"
,
"stabilityai/stablelm-3b-4e1t"
]
REQUIRES_V0
=
[
"microsoft/phi-2"
,
"stabilityai/stablelm-3b-4e1t"
]
...
...
tests/models/language/generation/test_mistral.py
View file @
fad73be1
...
@@ -20,7 +20,7 @@ MISTRAL_FORMAT_MODELS = [
...
@@ -20,7 +20,7 @@ MISTRAL_FORMAT_MODELS = [
"mistralai/Mistral-7B-Instruct-v0.3"
,
"mistralai/Mistral-7B-Instruct-v0.3"
,
# uses the v3-Tekken tokenizer
# uses the v3-Tekken tokenizer
"mistralai/Ministral-8B-Instruct-2410"
,
"mistralai/Ministral-8B-Instruct-2410"
,
# Mistral-Nemo is to big for CI, but passes locally
# Mistral-Nemo is to
o
big for CI, but passes locally
# "mistralai/Mistral-Nemo-Instruct-2407"
# "mistralai/Mistral-Nemo-Instruct-2407"
]
]
...
@@ -273,7 +273,7 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None:
...
@@ -273,7 +273,7 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None:
def
test_mistral_function_call_nested_json
():
def
test_mistral_function_call_nested_json
():
"""Ensure that the function-name regex captures the entire outer
-
most
"""Ensure that the function-name regex captures the entire outermost
JSON block, including nested braces."""
JSON block, including nested braces."""
# Create a minimal stub tokenizer that provides the few attributes the
# Create a minimal stub tokenizer that provides the few attributes the
...
...
tests/models/multimodal/generation/test_qwen2_vl.py
View file @
fad73be1
...
@@ -154,7 +154,7 @@ def batch_make_image_embeddings(
...
@@ -154,7 +154,7 @@ def batch_make_image_embeddings(
embed_counter
+=
cur_batch_embed_len
embed_counter
+=
cur_batch_embed_len
image_counter
+=
cur_batch_image_count
image_counter
+=
cur_batch_image_count
# ensure we don't los
t
any images or embeddings
# ensure we don't los
e
any images or embeddings
assert
embed_counter
==
image_embeds
.
size
(
0
)
assert
embed_counter
==
image_embeds
.
size
(
0
)
assert
image_counter
==
image_grid_thw
.
size
(
0
)
assert
image_counter
==
image_grid_thw
.
size
(
0
)
assert
len
(
image_batches
)
==
len
(
result
)
assert
len
(
image_batches
)
==
len
(
result
)
...
@@ -238,7 +238,7 @@ def batch_make_video_embeddings(
...
@@ -238,7 +238,7 @@ def batch_make_video_embeddings(
embed_counter
+=
cur_batch_embed_len
embed_counter
+=
cur_batch_embed_len
video_counter
+=
cur_batch_video_count
video_counter
+=
cur_batch_video_count
# ensure we don't los
t
any videos or embeddings
# ensure we don't los
e
any videos or embeddings
assert
embed_counter
==
video_embeds
.
size
(
0
)
assert
embed_counter
==
video_embeds
.
size
(
0
)
assert
video_counter
==
video_grid_thw
.
size
(
0
)
assert
video_counter
==
video_grid_thw
.
size
(
0
)
assert
len
(
video_batches
)
==
len
(
result
)
assert
len
(
video_batches
)
==
len
(
result
)
...
...
tests/v1/core/test_kv_cache_utils.py
View file @
fad73be1
...
@@ -247,7 +247,7 @@ def test_free_kv_cache_block_queue_append_n():
...
@@ -247,7 +247,7 @@ def test_free_kv_cache_block_queue_append_n():
def
test_free_kv_cache_block_queue_popleft_n
():
def
test_free_kv_cache_block_queue_popleft_n
():
blocks
=
[
KVCacheBlock
(
block_id
=
i
)
for
i
in
range
(
6
)]
blocks
=
[
KVCacheBlock
(
block_id
=
i
)
for
i
in
range
(
6
)]
# Create a empty FreeKVCacheBlockQueue with these blocks
# Create a
n
empty FreeKVCacheBlockQueue with these blocks
queue
=
FreeKVCacheBlockQueue
(
queue
=
FreeKVCacheBlockQueue
(
[
blocks
[
1
],
blocks
[
3
],
blocks
[
5
],
blocks
[
4
],
blocks
[
0
],
blocks
[
2
]])
[
blocks
[
1
],
blocks
[
3
],
blocks
[
5
],
blocks
[
4
],
blocks
[
0
],
blocks
[
2
]])
assert
queue
.
num_free_blocks
==
6
assert
queue
.
num_free_blocks
==
6
...
...
tests/v1/executor/test_executor.py
View file @
fad73be1
...
@@ -27,7 +27,7 @@ class CustomMultiprocExecutor(MultiprocExecutor):
...
@@ -27,7 +27,7 @@ class CustomMultiprocExecutor(MultiprocExecutor):
kwargs
:
Optional
[
dict
]
=
None
,
kwargs
:
Optional
[
dict
]
=
None
,
non_block
:
bool
=
False
,
non_block
:
bool
=
False
,
unique_reply_rank
:
Optional
[
int
]
=
None
)
->
list
[
Any
]:
unique_reply_rank
:
Optional
[
int
]
=
None
)
->
list
[
Any
]:
# Drop marker to show that this was r
a
n
# Drop marker to show that this was r
u
n
with
open
(
".marker"
,
"w"
):
with
open
(
".marker"
,
"w"
):
...
...
return
super
().
collective_rpc
(
method
,
timeout
,
args
,
kwargs
)
return
super
().
collective_rpc
(
method
,
timeout
,
args
,
kwargs
)
...
...
tests/v1/spec_decode/test_eagle.py
View file @
fad73be1
...
@@ -183,7 +183,7 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method,
...
@@ -183,7 +183,7 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method,
mock_pp_group
.
world_size
=
pp_size
mock_pp_group
.
world_size
=
pp_size
mock_get_pp_group
.
return_value
=
mock_pp_group
mock_get_pp_group
.
return_value
=
mock_pp_group
# Setup the target model mock with a custom class so that
# Set
up the target model mock with a custom class so that
# isinstance() checks match the expected type.
# isinstance() checks match the expected type.
class
_TargetModelStub
(
LlamaForCausalLM
):
class
_TargetModelStub
(
LlamaForCausalLM
):
model
:
mock
.
MagicMock
model
:
mock
.
MagicMock
...
...
tests/v1/test_kv_sharing.py
View file @
fad73be1
...
@@ -30,7 +30,7 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
...
@@ -30,7 +30,7 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups():
}
}
# Layers 0 and 1 both belong in KV cache group 0
# Layers 0 and 1 both belong in KV cache group 0
# However, if they have
have
different attention backends, they will be
# However, if they have different attention backends, they will be
# placed in different attention groups for KV cache group 0
# placed in different attention groups for KV cache group 0
kv_cache_groups
=
[
kv_cache_groups
=
[
KVCacheGroupSpec
([
"model.layers.0"
,
"model.layers.1"
],
KVCacheGroupSpec
([
"model.layers.0"
,
"model.layers.1"
],
...
...
tests/v1/worker/test_gpu_model_runner.py
View file @
fad73be1
...
@@ -702,7 +702,7 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
...
@@ -702,7 +702,7 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
KVCacheTensors for the attention and mamba layers
KVCacheTensors for the attention and mamba layers
(via _reshape_kv_cache_tensors function). This test verifies
(via _reshape_kv_cache_tensors function). This test verifies
that the views are compatible: writing a mamba block
that the views are compatible: writing a mamba block
will not corrupt an attention block and vice
-
versa
will not corrupt an attention block and vice
versa
'''
'''
current_platform
.
seed_everything
(
42
)
current_platform
.
seed_everything
(
42
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment