Commit bee0b4e8 authored by wujl5's avatar wujl5
Browse files

fix: 修复ep的变量未定义

parent b6f5a16a
...@@ -421,13 +421,9 @@ class DeepseekV2MoE(nn.Module): ...@@ -421,13 +421,9 @@ class DeepseekV2MoE(nn.Module):
# Fix FP16 overflow # Fix FP16 overflow
# See DeepseekV2DecoderLayer for more details. # See DeepseekV2DecoderLayer for more details.
# fp16 mode not fused quant # fp16 mode not fused quant
if i_q is not None:
i_q=iqis[0]
i_s=iqis[1]
final_hidden_states = self.experts(hidden_states=hidden_states, final_hidden_states = self.experts(hidden_states=hidden_states,
router_logits=router_logits, router_logits=router_logits,
i_q=i_q, i_s=i_s) i_q=i_q, i_s=i_s)
if shared_output is not None: if shared_output is not None:
if hidden_states.dtype != torch.float16: if hidden_states.dtype != torch.float16:
final_hidden_states = final_hidden_states + shared_output final_hidden_states = final_hidden_states + shared_output
...@@ -468,13 +464,11 @@ class DeepseekV2MoE(nn.Module): ...@@ -468,13 +464,11 @@ class DeepseekV2MoE(nn.Module):
assert shared_output is not None assert shared_output is not None
final_hidden_states += (shared_output * (1. / self.routed_scaling_factor)) final_hidden_states += (shared_output * (1. / self.routed_scaling_factor))
else: else:
if i_q is not None: if iqis is not None:
i_q=iqis[0] i_q, i_s = iqis
i_s=iqis[1]
final_hidden_states = self.experts(hidden_states=hidden_states, final_hidden_states = self.experts(hidden_states=hidden_states,
router_logits=router_logits, router_logits=router_logits,
i_q=i_q, i_s=i_s) i_q=i_q, i_s=i_s)
if shared_output is not None: if shared_output is not None:
if hidden_states.dtype != torch.float16: if hidden_states.dtype != torch.float16:
final_hidden_states = final_hidden_states + shared_output final_hidden_states = final_hidden_states + shared_output
...@@ -483,7 +477,6 @@ class DeepseekV2MoE(nn.Module): ...@@ -483,7 +477,6 @@ class DeepseekV2MoE(nn.Module):
# See DeepseekV2DecoderLayer for more details. # See DeepseekV2DecoderLayer for more details.
final_hidden_states = final_hidden_states + shared_output \ final_hidden_states = final_hidden_states + shared_output \
* (1. / self.routed_scaling_factor) * (1. / self.routed_scaling_factor)
if self.tp_size > 1: if self.tp_size > 1:
if envs.VLLM_ENABLE_TBO: if envs.VLLM_ENABLE_TBO:
final_hidden_states = self.tbo_all_reduce(final_hidden_states) final_hidden_states = self.tbo_all_reduce(final_hidden_states)
...@@ -491,7 +484,6 @@ class DeepseekV2MoE(nn.Module): ...@@ -491,7 +484,6 @@ class DeepseekV2MoE(nn.Module):
final_hidden_states = ( final_hidden_states = (
self.experts.maybe_all_reduce_tensor_model_parallel( self.experts.maybe_all_reduce_tensor_model_parallel(
final_hidden_states)) final_hidden_states))
return final_hidden_states.view(num_tokens, hidden_dim) return final_hidden_states.view(num_tokens, hidden_dim)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment