Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f4cd62b9
Commit
f4cd62b9
authored
Apr 11, 2026
by
王敏
Browse files
[fix]修复缺少参数等错误
parent
3c7c9ca2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
1 deletion
+3
-1
vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py
.../model_executor/layers/fused_moe/batched_deep_gemm_moe.py
+1
-0
vllm/v1/attention/backend.py
vllm/v1/attention/backend.py
+2
-1
No files found.
vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py
View file @
f4cd62b9
...
@@ -590,6 +590,7 @@ class BatchedDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
...
@@ -590,6 +590,7 @@ class BatchedDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
expert_tokens_meta
:
mk
.
ExpertTokensMetadata
|
None
,
expert_tokens_meta
:
mk
.
ExpertTokensMetadata
|
None
,
apply_router_weight_on_input
:
bool
,
apply_router_weight_on_input
:
bool
,
use_nn_moe
:
bool
|
None
=
False
,
use_nn_moe
:
bool
|
None
=
False
,
**
_
):
):
assert
expert_tokens_meta
is
not
None
assert
expert_tokens_meta
is
not
None
expert_num_tokens
=
expert_tokens_meta
.
expert_num_tokens
expert_num_tokens
=
expert_tokens_meta
.
expert_num_tokens
...
...
vllm/v1/attention/backend.py
View file @
f4cd62b9
...
@@ -342,7 +342,7 @@ class CommonAttentionMetadata:
...
@@ -342,7 +342,7 @@ class CommonAttentionMetadata:
block_table_tensor
:
torch
.
Tensor
block_table_tensor
:
torch
.
Tensor
slot_mapping
:
torch
.
Tensor
slot_mapping
:
torch
.
Tensor
num_kv_actual_tokens
:
int
num_kv_actual_tokens
:
int
|
None
=
None
seq_indexes_list
:
list
[
int
]
|
None
=
None
seq_indexes_list
:
list
[
int
]
|
None
=
None
scatter_indexes_tensor
:
torch
.
Tensor
|
None
=
None
scatter_indexes_tensor
:
torch
.
Tensor
|
None
=
None
...
@@ -434,6 +434,7 @@ class CommonAttentionMetadata:
...
@@ -434,6 +434,7 @@ class CommonAttentionMetadata:
else
None
,
else
None
,
num_reqs
=
num_actual_reqs
,
num_reqs
=
num_actual_reqs
,
num_actual_tokens
=
num_actual_tokens
,
num_actual_tokens
=
num_actual_tokens
,
num_kv_actual_tokens
=
num_actual_tokens
,
max_query_len
=
self
.
max_query_len
,
max_query_len
=
self
.
max_query_len
,
max_seq_len
=
self
.
max_seq_len
,
max_seq_len
=
self
.
max_seq_len
,
block_table_tensor
=
self
.
block_table_tensor
[:
num_actual_reqs
],
block_table_tensor
=
self
.
block_table_tensor
[:
num_actual_reqs
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment