Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bcb06d7b
Unverified
Commit
bcb06d7b
authored
Sep 12, 2025
by
Didier Durand
Committed by
GitHub
Sep 12, 2025
Browse files
[Doc]: fix typos in various files (#24726)
Signed-off-by:
Didier Durand
<
durand.didier@gmail.com
>
parent
0377802c
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
11 additions
and
11 deletions
+11
-11
benchmarks/kernels/benchmark_w8a8_block_fp8.py
benchmarks/kernels/benchmark_w8a8_block_fp8.py
+1
-1
csrc/cpu/cpu_types_vxe.hpp
csrc/cpu/cpu_types_vxe.hpp
+1
-1
csrc/cpu/sgl-kernels/moe.cpp
csrc/cpu/sgl-kernels/moe.cpp
+1
-1
docs/design/multiprocessing.md
docs/design/multiprocessing.md
+1
-1
vllm/attention/backends/flash_attn.py
vllm/attention/backends/flash_attn.py
+1
-1
vllm/benchmarks/datasets.py
vllm/benchmarks/datasets.py
+1
-1
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+1
-1
vllm/model_executor/layers/mamba/mamba_mixer2.py
vllm/model_executor/layers/mamba/mamba_mixer2.py
+1
-1
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+1
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-1
vllm/v1/worker/tpu_input_batch.py
vllm/v1/worker/tpu_input_batch.py
+1
-1
No files found.
benchmarks/kernels/benchmark_w8a8_block_fp8.py
View file @
bcb06d7b
...
...
@@ -56,7 +56,7 @@ def w8a8_block_matmul(
Bs: The per-block quantization scale for `B`.
block_size: The block size for per-block quantization.
It should be 2-dim, e.g., [128, 128].
output_d
y
tpe: The dtype of the returned tensor.
output_dt
y
pe: The dtype of the returned tensor.
Returns:
torch.Tensor: The result of matmul.
...
...
csrc/cpu/cpu_types_vxe.hpp
View file @
bcb06d7b
...
...
@@ -12,7 +12,7 @@ namespace vec_op {
#define vec_sub(a, b) ((a) - (b))
#define vec_mul(a, b) ((a) * (b))
#define vec_div(a, b) ((a) / (b))
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebaic
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algeb
r
aic
#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
// FIXME: FP16 is not fully supported in Torch-CPU
...
...
csrc/cpu/sgl-kernels/moe.cpp
View file @
bcb06d7b
...
...
@@ -215,7 +215,7 @@ int moe_align_block_size(
offsets
[
mb
+
1
]
=
sorted_id_size
(
sorted_ids
+
mb
*
BLOCK_M
);
}
});
// TODO: do we need to vect
e
rize this ?
// TODO: do we need to vect
o
rize this ?
for
(
int
mb
=
0
;
mb
<
num_token_blocks
;
++
mb
)
{
offsets
[
mb
+
1
]
+=
offsets
[
mb
];
}
...
...
docs/design/multiprocessing.md
View file @
bcb06d7b
...
...
@@ -8,7 +8,7 @@ page for information on known issues and how to solve them.
## Introduction
!!! important
The source code references are to the state of the code at the time of writing in December
,
2024.
The source code references are to the state of the code at the time of writing in December 2024.
The use of Python multiprocessing in vLLM is complicated by:
...
...
vllm/attention/backends/flash_attn.py
View file @
bcb06d7b
...
...
@@ -901,7 +901,7 @@ def _get_query_key_seq_metadata(
attn_metadata
.
encoder_seq_start_loc
,
attn_metadata
.
max_encoder_seq_len
)
elif
attn_type
==
AttentionType
.
ENCODER
:
# For encoder attention both the query and the key are same i.e the
# For encoder attention both the query and the key are same i.e
.
the
# encoder sequence.
return
(
attn_metadata
.
encoder_seq_start_loc
,
attn_metadata
.
max_encoder_seq_len
,
...
...
vllm/benchmarks/datasets.py
View file @
bcb06d7b
...
...
@@ -551,7 +551,7 @@ class RandomDataset(BenchmarkDataset):
[6880, 6881] -> ['Ġcalls', 'here'] ->
[1650, 939, 486] -> ['Ġcall', 'sh', 'ere']
To avoid uncontrolled change of the prompt length,
the encoded sequence is truncated before being decode again.
the encoded sequence is truncated before being decode
d
again.
"""
# Build the inner sequence by sampling sequentially from the vocab
inner_seq
=
((
offset
+
index
+
np
.
arange
(
input_len
))
...
...
vllm/entrypoints/openai/protocol.py
View file @
bcb06d7b
...
...
@@ -242,7 +242,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
elif
processors
:
raise
ValueError
(
"The `logits_processors` argument is not supported by this "
"server. See --logits-processor-pattern engine argu
g
ment "
"server. See --logits-processor-pattern engine argument "
"for more information."
)
return
None
...
...
vllm/model_executor/layers/mamba/mamba_mixer2.py
View file @
bcb06d7b
...
...
@@ -324,7 +324,7 @@ class MambaMixer2(MambaBase, CustomOp):
# - the weight already has a "weight_loader" attribute
# which set_weight_attrs will raise if we do not
# delete before trying to override it
# - ditto for the ot
t
her two weights below
# - ditto for the other two weights below
delattr
(
self
.
conv1d
.
bias
,
"weight_loader"
)
set_weight_attrs
(
self
.
conv1d
.
bias
,
...
...
vllm/model_executor/models/minicpmv.py
View file @
bcb06d7b
...
...
@@ -1117,7 +1117,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
def
_process_multimodal_inputs
(
self
,
modalities
:
dict
):
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspo
e
nding to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings
:
tuple
[
torch
.
Tensor
,
...]
=
()
# NOTE: It is important to iterate over the keys in this dictionary
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
bcb06d7b
...
...
@@ -2659,7 +2659,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
num_tokens
+=
num_pad
# If cudagraph_mode.decode_mode() == FULL and
# cudagraph_mode.sep
e
rate_routine(). This means that we are using
# cudagraph_mode.sep
a
rate_routine(). This means that we are using
# different graphs and/or modes for mixed prefill-decode batches vs.
# uniform decode batches. A uniform decode batch means that all
# requests have identical query length, except a potential virtual
...
...
vllm/v1/worker/tpu_input_batch.py
View file @
bcb06d7b
...
...
@@ -392,7 +392,7 @@ class InputBatch:
# NOTE: the following is unsafe
# self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\
# self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...]
# instead, we need to tempor
i
arily copy the data for one of the indices
# instead, we need to temporarily copy the data for one of the indices
# TODO(lucas): optimize this by only copying valid indices
tmp
=
self
.
token_ids_cpu
[
i1
,
...].
copy
()
self
.
token_ids_cpu
[
i1
,
...]
=
self
.
token_ids_cpu
[
i2
,
...]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment