Unverified Commit ca9695b4 authored by Atream's avatar Atream Committed by GitHub
Browse files

Merge pull request #1033 from kvcache-ai/Atream-patch-1

Update modeling_deepseek_v3.py
parents 016d11e6 e36ddc36
...@@ -436,7 +436,7 @@ class MoEGate(nn.Module): ...@@ -436,7 +436,7 @@ class MoEGate(nn.Module):
### select top-k experts ### select top-k experts
if self.topk_method == "noaux_tc": if self.topk_method == "noaux_tc":
assert not self.training #assert not self.training
scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0) scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0)
group_scores = ( group_scores = (
scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1) scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment