Commit b6a7135f authored by 王敏's avatar 王敏
Browse files

[fix]修复tp1 dpsk启动失败

parent ad7c14d5
...@@ -484,7 +484,8 @@ class DeepseekV2MoE(nn.Module): ...@@ -484,7 +484,8 @@ class DeepseekV2MoE(nn.Module):
final_hidden_states = ( final_hidden_states = (
self.experts.maybe_all_reduce_tensor_model_parallel( self.experts.maybe_all_reduce_tensor_model_parallel(
final_hidden_states)) final_hidden_states))
return final_hidden_states.view(num_tokens, hidden_dim)
return final_hidden_states.view(num_tokens, hidden_dim)
def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float: def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment