"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "27feead2f80f906de88d64c6e69342451cf1d7f0"
Commit ed4b88d5 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge remote-tracking branch 'origin/v0.9.2-dev-yql-mtp' into v0.9.2-dev

parents fb94600a 080ed180
......@@ -65,7 +65,7 @@ from .utils import (PPMissingLayer, is_pp_missing_parameter,
from vllm import _custom_ops as ops
from vllm.utils import W8a8GetCacheJSON
os.environ['DPSK_FP16_QUICK'] = os.environ.get('DPSK_FP16_QUICK', '1')
os.environ['DPSK_FP16_QUICK'] = os.environ.get('DPSK_FP16_QUICK', '0')
class DeepseekV2MLP(nn.Module):
def __init__(
......@@ -608,9 +608,13 @@ class DeepseekV2DecoderLayer(nn.Module):
residual: Optional[torch.Tensor],
) -> torch.Tensor:
# Self Attention
# Fix residual FP16 overflow
residual_fix_overflow = False
if residual is None:
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
residual_fix_overflow = True
else:
hidden_states, residual = self.input_layernorm(
hidden_states, residual)
......@@ -624,7 +628,7 @@ class DeepseekV2DecoderLayer(nn.Module):
# We scale both hidden_states and residual before
# rmsnorm, and rmsnorm result would not affect by scale.
hidden_states *= 1. / self.routed_scaling_factor
if self.layer_idx == 0:
if self.layer_idx == 0 or residual_fix_overflow:
# The residual is shared by all layers, we only scale it on
# first layer.
residual *= 1. / self.routed_scaling_factor
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment