update moe models of LM_NN

2db90e2c · zhuwenwen · 31330101 · 2db90e2c · 2db90e2c · 2db90e2c
Commit 2db90e2c authored Apr 16, 2025 by zhuwenwen
3 changed files
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -580,15 +580,15 @@ class DeepseekV2DecoderLayer(nn.Module):
            hidden_states=hidden_states,
        )

-        if hidden_states.dtype == torch.float16:
-            # Fix FP16 overflow
-            # We scale both hidden_states and residual before
-            # rmsnorm, and rmsnorm result would not affect by scale.
-            hidden_states *= 1. / self.routed_scaling_factor
-            if self.layer_idx == 0:
-                # The residual is shared by all layers, we only scale it on
-                # first layer.
-                residual *= 1. / self.routed_scaling_factor
+        # if hidden_states.dtype == torch.float16:
+        #     # Fix FP16 overflow
+        #     # We scale both hidden_states and residual before
+        #     # rmsnorm, and rmsnorm result would not affect by scale.
+        #     hidden_states *= 1. / self.routed_scaling_factor
+        #     if self.layer_idx == 0:
+        #         # The residual is shared by all layers, we only scale it on
+        #         # first layer.
+        #         residual *= 1. / self.routed_scaling_factor

        # Fully Connected
        hidden_states, residual = self.post_attention_layernorm(

--- a/vllm/model_executor/models/mixtral.py
+++ b/vllm/model_executor/models/mixtral.py
@@ -427,6 +427,7 @@ class MixtralModel(nn.Module):
            
            for layername in loaded_params:
                weight = params_dict[layername]
+                os.environ['LM_NN'] = '0' 

                matches = re.findall(combined_words, layername)


--- a/vllm/model_executor/models/qwen2_moe.py
+++ b/vllm/model_executor/models/qwen2_moe.py
@@ -511,6 +511,7 @@ class Qwen2MoeModel(nn.Module):
            
            for layername in loaded_params:
                weight = params_dict[layername]
+                os.environ['LM_NN'] = '0' 
                # if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
                #     weight.data = pad_weight(weight.data, 32)