Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e26fef83
Unverified
Commit
e26fef83
authored
Sep 12, 2025
by
co63oc
Committed by
GitHub
Sep 11, 2025
Browse files
fix some typos (#24616)
Signed-off-by:
co63oc
<
co63oc@users.noreply.github.com
>
parent
c1eda615
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
6 additions
and
6 deletions
+6
-6
tests/compile/test_basic_correctness.py
tests/compile/test_basic_correctness.py
+1
-1
tests/kernels/mamba/test_mamba_ssm_ssd.py
tests/kernels/mamba/test_mamba_ssm_ssd.py
+1
-1
vllm/model_executor/layers/fla/ops/cumsum.py
vllm/model_executor/layers/fla/ops/cumsum.py
+1
-1
vllm/v1/attention/backends/mla/common.py
vllm/v1/attention/backends/mla/common.py
+1
-1
vllm/v1/worker/block_table.py
vllm/v1/worker/block_table.py
+2
-2
No files found.
tests/compile/test_basic_correctness.py
View file @
e26fef83
...
...
@@ -23,7 +23,7 @@ class TestSetting:
fullgraph
:
bool
# we cannot afford testing the full Catesian product
# we cannot afford testing the full Ca
r
tesian product
# of all models and all levels
@
pytest
.
mark
.
parametrize
(
"test_setting"
,
...
...
tests/kernels/mamba/test_mamba_ssm_ssd.py
View file @
e26fef83
...
...
@@ -345,7 +345,7 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens):
# in the mamba2 ssd kernels, by comparing concatenation (in the sequence
# dimension) of chunked results with the full sequence result.
# It is different from test_mamba_chunk_scan_cont_batch by:
# 1. Not using the naive torch implementaion (ssd_minimal_discrete) to get
# 1. Not using the naive torch implementa
t
ion (ssd_minimal_discrete) to get
# reference outputs. Instead, it compares chunked kernel outputs to full
# sequence kernel outputs. This is the most straightforward way to
# assert chunked prefill correctness.
...
...
vllm/model_executor/layers/fla/ops/cumsum.py
View file @
e26fef83
...
...
@@ -179,7 +179,7 @@ def chunk_local_cumsum_vector(
def
grid
(
meta
):
return
(
triton
.
cdiv
(
meta
[
'S'
],
meta
[
'BS'
]),
NT
,
B
*
H
)
# keep cum
m
ulative normalizer in fp32
# keep cumulative normalizer in fp32
# this kernel is equivalent to
# g = g.view(B, H, NT, BT, -1).cumsum(-2).view(B, H, T, -1)
chunk_local_cumsum_vector_kernel
[
grid
](
g_org
,
...
...
vllm/v1/attention/backends/mla/common.py
View file @
e26fef83
...
...
@@ -1322,7 +1322,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
k_scale
:
torch
.
Tensor
,
dcp_world_size
:
int
,
):
assert
k_scale
is
None
,
"DCP not support s
a
cled kvcache now."
assert
k_scale
is
None
,
"DCP not support sc
a
led kvcache now."
assert
attn_metadata
.
prefill
is
not
None
prefill_metadata
=
attn_metadata
.
prefill
assert
prefill_metadata
.
chunked_context
is
not
None
...
...
vllm/v1/worker/block_table.py
View file @
e26fef83
...
...
@@ -112,9 +112,9 @@ class BlockTable:
# tokens.
virtual_block_offsets
=
positions
%
virtual_block_size
mask
=
virtual_block_offsets
%
self
.
dcp_world_size
==
self
.
dcp_rank
# Calcuate local block_offsets
# Calcu
l
ate local block_offsets
block_offsets
=
virtual_block_offsets
//
self
.
dcp_world_size
# Calcuate slot_mapping
# Calcu
l
ate slot_mapping
slot_mapping
=
block_numbers
*
self
.
block_size
+
block_offsets
# Write final slots, use -1 for not-local
self
.
slot_mapping_np
[:
req_indices
.
shape
[
0
]]
=
np
.
where
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment