Unverified Commit e01ff5c0 authored by amirkl94's avatar amirkl94 Committed by GitHub
Browse files

Bugfix: Pass router logits dtype in nemotron shared experts (#32669)


Signed-off-by: default avatarAmir Klein <203507526+amirkl94@users.noreply.github.com>
parent fb946a7f
...@@ -145,11 +145,12 @@ class NemotronHMoE(nn.Module): ...@@ -145,11 +145,12 @@ class NemotronHMoE(nn.Module):
self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe
router_logits_dtype = torch.float32
self.gate = ReplicatedLinear( self.gate = ReplicatedLinear(
config.hidden_size, config.hidden_size,
config.n_routed_experts, config.n_routed_experts,
bias=False, bias=False,
params_dtype=torch.float32, params_dtype=router_logits_dtype,
quant_config=None, quant_config=None,
prefix=f"{prefix}.gate", prefix=f"{prefix}.gate",
) )
...@@ -209,6 +210,7 @@ class NemotronHMoE(nn.Module): ...@@ -209,6 +210,7 @@ class NemotronHMoE(nn.Module):
enable_eplb=self.enable_eplb, enable_eplb=self.enable_eplb,
num_redundant_experts=self.n_redundant_experts, num_redundant_experts=self.n_redundant_experts,
is_sequence_parallel=self.is_sequence_parallel, is_sequence_parallel=self.is_sequence_parallel,
router_logits_dtype=router_logits_dtype,
) )
if self.use_latent_moe: if self.use_latent_moe:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment