Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
28c94770
Unverified
Commit
28c94770
authored
Jan 06, 2026
by
roikoren755
Committed by
GitHub
Jan 06, 2026
Browse files
[NemotronH] Use ReplicatedLinear for fc1_latent_proj (#31807)
Signed-off-by:
Roi Koren
<
roik@nvidia.com
>
parent
af8fd730
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1 addition
and
5 deletions
+1
-5
vllm/model_executor/models/nemotron_h.py
vllm/model_executor/models/nemotron_h.py
+1
-5
No files found.
vllm/model_executor/models/nemotron_h.py
View file @
28c94770
...
@@ -210,16 +210,12 @@ class NemotronHMoE(nn.Module):
...
@@ -210,16 +210,12 @@ class NemotronHMoE(nn.Module):
)
)
if
self
.
use_latent_moe
:
if
self
.
use_latent_moe
:
# TODO: check if using ReplicatedLinear is better than
self
.
fc1_latent_proj
=
ReplicatedLinear
(
# ColumnParallelLinear + all_gather
self
.
fc1_latent_proj
=
ColumnParallelLinear
(
input_size
=
config
.
hidden_size
,
input_size
=
config
.
hidden_size
,
output_size
=
self
.
moe_hidden_size
,
output_size
=
self
.
moe_hidden_size
,
bias
=
config
.
mlp_bias
,
bias
=
config
.
mlp_bias
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
disable_tp
=
self
.
is_sequence_parallel
,
disable_tp
=
self
.
is_sequence_parallel
,
# We need to gather the output to prepare input for moe
gather_output
=
True
,
prefix
=
f
"
{
prefix
}
.fc1_latent_proj"
,
prefix
=
f
"
{
prefix
}
.fc1_latent_proj"
,
)
)
self
.
fc2_latent_proj
=
ReplicatedLinear
(
self
.
fc2_latent_proj
=
ReplicatedLinear
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment