Commit a1abfaf3 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-ep-bug-fix' into 'v0.9.2-dev'

fix: 修复ep的变量未定义

See merge request dcutoolkit/deeplearing/vllm!423
parents 3c74c91a bee0b4e8
......@@ -421,13 +421,9 @@ class DeepseekV2MoE(nn.Module):
# Fix FP16 overflow
# See DeepseekV2DecoderLayer for more details.
# fp16 mode not fused quant
if i_q is not None:
i_q=iqis[0]
i_s=iqis[1]
final_hidden_states = self.experts(hidden_states=hidden_states,
router_logits=router_logits,
i_q=i_q, i_s=i_s)
if shared_output is not None:
if hidden_states.dtype != torch.float16:
final_hidden_states = final_hidden_states + shared_output
......@@ -468,13 +464,11 @@ class DeepseekV2MoE(nn.Module):
assert shared_output is not None
final_hidden_states += (shared_output * (1. / self.routed_scaling_factor))
else:
if i_q is not None:
i_q=iqis[0]
i_s=iqis[1]
if iqis is not None:
i_q, i_s = iqis
final_hidden_states = self.experts(hidden_states=hidden_states,
router_logits=router_logits,
i_q=i_q, i_s=i_s)
if shared_output is not None:
if hidden_states.dtype != torch.float16:
final_hidden_states = final_hidden_states + shared_output
......@@ -483,7 +477,6 @@ class DeepseekV2MoE(nn.Module):
# See DeepseekV2DecoderLayer for more details.
final_hidden_states = final_hidden_states + shared_output \
* (1. / self.routed_scaling_factor)
if self.tp_size > 1:
if envs.VLLM_ENABLE_TBO:
final_hidden_states = self.tbo_all_reduce(final_hidden_states)
......@@ -491,7 +484,6 @@ class DeepseekV2MoE(nn.Module):
final_hidden_states = (
self.experts.maybe_all_reduce_tensor_model_parallel(
final_hidden_states))
return final_hidden_states.view(num_tokens, hidden_dim)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment