Commit f4cd62b9 authored by 王敏's avatar 王敏
Browse files

[fix]修复缺少参数等错误

parent 3c7c9ca2
...@@ -590,6 +590,7 @@ class BatchedDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): ...@@ -590,6 +590,7 @@ class BatchedDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
expert_tokens_meta: mk.ExpertTokensMetadata | None, expert_tokens_meta: mk.ExpertTokensMetadata | None,
apply_router_weight_on_input: bool, apply_router_weight_on_input: bool,
use_nn_moe: bool | None = False, use_nn_moe: bool | None = False,
**_
): ):
assert expert_tokens_meta is not None assert expert_tokens_meta is not None
expert_num_tokens = expert_tokens_meta.expert_num_tokens expert_num_tokens = expert_tokens_meta.expert_num_tokens
......
...@@ -342,7 +342,7 @@ class CommonAttentionMetadata: ...@@ -342,7 +342,7 @@ class CommonAttentionMetadata:
block_table_tensor: torch.Tensor block_table_tensor: torch.Tensor
slot_mapping: torch.Tensor slot_mapping: torch.Tensor
num_kv_actual_tokens: int num_kv_actual_tokens: int | None = None
seq_indexes_list: list[int] | None = None seq_indexes_list: list[int] | None = None
scatter_indexes_tensor: torch.Tensor | None = None scatter_indexes_tensor: torch.Tensor | None = None
...@@ -434,6 +434,7 @@ class CommonAttentionMetadata: ...@@ -434,6 +434,7 @@ class CommonAttentionMetadata:
else None, else None,
num_reqs=num_actual_reqs, num_reqs=num_actual_reqs,
num_actual_tokens=num_actual_tokens, num_actual_tokens=num_actual_tokens,
num_kv_actual_tokens=num_actual_tokens,
max_query_len=self.max_query_len, max_query_len=self.max_query_len,
max_seq_len=self.max_seq_len, max_seq_len=self.max_seq_len,
block_table_tensor=self.block_table_tensor[:num_actual_reqs], block_table_tensor=self.block_table_tensor[:num_actual_reqs],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment