Commit 3c08fbc1 authored by 王敏's avatar 王敏
Browse files

修改mori最大输入token数

parent 1a56d6cb
...@@ -259,7 +259,7 @@ class EPMoE(FusedMoE): ...@@ -259,7 +259,7 @@ class EPMoE(FusedMoE):
hidden_dim=self.hidden_size, hidden_dim=self.hidden_size,
scale_dim=0, scale_dim=0,
scale_type_size=vllm_config.model_config.dtype.itemsize, scale_type_size=vllm_config.model_config.dtype.itemsize,
max_num_inp_token_per_rank=20480, max_num_inp_token_per_rank=5120,
num_experts_per_rank=self.local_num_experts, num_experts_per_rank=self.local_num_experts,
num_experts_per_token=self.top_k, num_experts_per_token=self.top_k,
max_token_type_size=2, max_token_type_size=2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment