Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
41ae4a1e
Unverified
Commit
41ae4a1e
authored
Sep 13, 2025
by
Didier Durand
Committed by
GitHub
Sep 13, 2025
Browse files
[Doc]: fix typos in various files (#24798)
Signed-off-by:
Didier Durand
<
durand.didier@gmail.com
>
parent
4dad72f0
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
13 additions
and
13 deletions
+13
-13
examples/offline_inference/tpu.py
examples/offline_inference/tpu.py
+1
-1
vllm/core/block/naive_block.py
vllm/core/block/naive_block.py
+1
-1
vllm/core/evictor.py
vllm/core/evictor.py
+1
-1
vllm/engine/metrics.py
vllm/engine/metrics.py
+2
-2
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+2
-2
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/vocab_parallel_embedding.py
+1
-1
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+1
-1
vllm/model_executor/models/gemma3n_mm.py
vllm/model_executor/models/gemma3n_mm.py
+2
-2
vllm/model_executor/models/nemotron_vl.py
vllm/model_executor/models/nemotron_vl.py
+1
-1
vllm/model_executor/models/phi4mm.py
vllm/model_executor/models/phi4mm.py
+1
-1
No files found.
examples/offline_inference/tpu.py
View file @
41ae4a1e
...
@@ -42,7 +42,7 @@ def main():
...
@@ -42,7 +42,7 @@ def main():
llm_args
[
"model"
]
=
"meta-llama/Llama-3.1-8B-Instruct"
llm_args
[
"model"
]
=
"meta-llama/Llama-3.1-8B-Instruct"
# Set `enforce_eager=True` to avoid ahead-of-time compilation.
# Set `enforce_eager=True` to avoid ahead-of-time compilation.
# In real workloads, `enfor
a
ce_eager` should be `False`.
# In real workloads, `enforce_eager` should be `False`.
llm
=
LLM
(
**
llm_args
)
llm
=
LLM
(
**
llm_args
)
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
print
(
"-"
*
50
)
print
(
"-"
*
50
)
...
...
vllm/core/block/naive_block.py
View file @
41ae4a1e
...
@@ -182,7 +182,7 @@ class NaiveBlockAllocator(BlockAllocator):
...
@@ -182,7 +182,7 @@ class NaiveBlockAllocator(BlockAllocator):
# Increment refcount for each block.
# Increment refcount for each block.
assert
block
.
block_id
is
not
None
assert
block
.
block_id
is
not
None
refcount
=
self
.
_refcounter
.
incr
(
block
.
block_id
)
refcount
=
self
.
_refcounter
.
incr
(
block
.
block_id
)
assert
refcount
!=
1
,
"can't fork free
'
d block"
assert
refcount
!=
1
,
"can't fork freed block"
forked_block
=
self
.
_block_pool
.
init_block
(
forked_block
=
self
.
_block_pool
.
init_block
(
prev_block
=
prev_block
,
prev_block
=
prev_block
,
...
...
vllm/core/evictor.py
View file @
41ae4a1e
...
@@ -58,7 +58,7 @@ class Evictor(ABC):
...
@@ -58,7 +58,7 @@ class Evictor(ABC):
class
BlockMetaData
:
class
BlockMetaData
:
"""Data structure for storing key data describe cached block, so that
"""Data structure for storing key data describe cached block, so that
evitor could use to make its decision which one to choose for eviction
evi
c
tor could use to make its decision which one to choose for eviction
Here we use physical block id as the dict key, as there maybe several
Here we use physical block id as the dict key, as there maybe several
blocks with the same content hash, but their physical id is unique.
blocks with the same content hash, but their physical id is unique.
...
...
vllm/engine/metrics.py
View file @
41ae4a1e
...
@@ -379,7 +379,7 @@ class LoggingStatLogger(StatLoggerBase):
...
@@ -379,7 +379,7 @@ class LoggingStatLogger(StatLoggerBase):
if
local_interval_elapsed
(
stats
.
now
,
self
.
last_local_log
,
if
local_interval_elapsed
(
stats
.
now
,
self
.
last_local_log
,
self
.
local_interval
):
self
.
local_interval
):
# Compute summary metrics for tracked stats (and log them
# Compute summary metrics for tracked stats (and log them
# to promethus if applicable).
# to prometh
e
us if applicable).
prompt_throughput
=
get_throughput
(
self
.
num_prompt_tokens
,
prompt_throughput
=
get_throughput
(
self
.
num_prompt_tokens
,
now
=
stats
.
now
,
now
=
stats
.
now
,
last_log
=
self
.
last_local_log
)
last_log
=
self
.
last_local_log
)
...
@@ -432,7 +432,7 @@ class LoggingStatLogger(StatLoggerBase):
...
@@ -432,7 +432,7 @@ class LoggingStatLogger(StatLoggerBase):
class
PrometheusStatLogger
(
StatLoggerBase
):
class
PrometheusStatLogger
(
StatLoggerBase
):
"""PrometheusStatLogger is used LLMEngine to log to Promethus."""
"""PrometheusStatLogger is used LLMEngine to log to Prometh
e
us."""
_metrics_cls
=
Metrics
_metrics_cls
=
Metrics
_gauge_cls
=
prometheus_client
.
Gauge
_gauge_cls
=
prometheus_client
.
Gauge
...
...
vllm/model_executor/layers/linear.py
View file @
41ae4a1e
...
@@ -740,7 +740,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
...
@@ -740,7 +740,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
"""
"""
Handle special case for models where MLP layers are already
Handle special case for models where MLP layers are already
fused on disk. In this case, we have no shard id. This function
fused on disk. In this case, we have no shard id. This function
determ
m
ines the shard id by splitting these layers and then calls
determines the shard id by splitting these layers and then calls
the weight loader using the shard id.
the weight loader using the shard id.
An example of a model with these fused layers:
An example of a model with these fused layers:
...
@@ -914,7 +914,7 @@ class QKVParallelLinear(ColumnParallelLinear):
...
@@ -914,7 +914,7 @@ class QKVParallelLinear(ColumnParallelLinear):
"""
"""
Handle special case for models where QKV layers are already
Handle special case for models where QKV layers are already
fused on disk. In this case, we have no shard id. This function
fused on disk. In this case, we have no shard id. This function
determ
m
ines the shard id by splitting these layers and then calls
determines the shard id by splitting these layers and then calls
the weight loader using the shard id.
the weight loader using the shard id.
An example of a model with these fused layers:
An example of a model with these fused layers:
...
...
vllm/model_executor/layers/vocab_parallel_embedding.py
View file @
41ae4a1e
...
@@ -258,7 +258,7 @@ class VocabParallelEmbedding(CustomOp):
...
@@ -258,7 +258,7 @@ class VocabParallelEmbedding(CustomOp):
if
params_dtype
is
None
:
if
params_dtype
is
None
:
params_dtype
=
torch
.
get_default_dtype
()
params_dtype
=
torch
.
get_default_dtype
()
# Divide the weight matrix along the vocabu
ral
y dimension.
# Divide the weight matrix along the vocabu
lar
y dimension.
self
.
num_added_embeddings
=
self
.
num_embeddings
-
self
.
org_vocab_size
self
.
num_added_embeddings
=
self
.
num_embeddings
-
self
.
org_vocab_size
self
.
num_embeddings_per_partition
=
divide
(
self
.
num_embeddings_padded
,
self
.
num_embeddings_per_partition
=
divide
(
self
.
num_embeddings_padded
,
self
.
tp_size
)
self
.
tp_size
)
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
41ae4a1e
...
@@ -1446,7 +1446,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1446,7 +1446,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(nn.Module, SupportsMultiModal,
return
None
return
None
# The result multimodal_embeddings is tuple of tensors, with each
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspo
e
nding to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
# NOTE: It is important to iterate over the keys in this dictionary
# NOTE: It is important to iterate over the keys in this dictionary
...
...
vllm/model_executor/models/gemma3n_mm.py
View file @
41ae4a1e
...
@@ -586,10 +586,10 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -586,10 +586,10 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal,
# ruff: noqa
# ruff: noqa
# The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the
# The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the
# text to account for this. However, the audio preprocessing and encoder do not gu
r
arantee they will
# text to account for this. However, the audio preprocessing and encoder do not guarantee they will
# produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens
# produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens
# depending on the length of the longest audio input in the batch. When we encounter this situation, we pad
# depending on the length of the longest audio input in the batch. When we encounter this situation, we pad
# the audio feature out to 188 soft tokens with the em
e
bedding of the last token in the embed_audio vocab.
# the audio feature out to 188 soft tokens with the embedding of the last token in the embed_audio vocab.
# TODO precompute and cache padding
# TODO precompute and cache padding
audio_padding_toks
=
torch
.
tensor
([[
self
.
vocab_size
-
1
]],
audio_padding_toks
=
torch
.
tensor
([[
self
.
vocab_size
-
1
]],
dtype
=
torch
.
long
,
dtype
=
torch
.
long
,
...
...
vllm/model_executor/models/nemotron_vl.py
View file @
41ae4a1e
...
@@ -560,7 +560,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
...
@@ -560,7 +560,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
return
[]
return
[]
# The result multimodal_embeddings is tuple of tensors, with each
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspo
e
nding to a multimodal data item (image).
# tensor corresponding to a multimodal data item (image).
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
# NOTE: It is important to iterate over the keys in this dictionary
# NOTE: It is important to iterate over the keys in this dictionary
...
...
vllm/model_executor/models/phi4mm.py
View file @
41ae4a1e
...
@@ -1154,7 +1154,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
...
@@ -1154,7 +1154,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
return
None
return
None
# The result multimodal_embeddings is tuple of tensors, with each
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspo
e
nding to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
# NOTE: It is important to iterate over the keys in this dictionary
# NOTE: It is important to iterate over the keys in this dictionary
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment