Commit d2fe5111 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix optional and profling

parent 8d0e36b5
......@@ -32,6 +32,7 @@ from vllm.utils.torch_utils import supports_dynamo
from .monitor import start_monitoring_torch_compile
from vllm.config import VllmConfig
from vllm.forward_context import get_profilling
logger = init_logger(__name__)
......
......@@ -232,9 +232,9 @@ class DeepseekV2MLP(nn.Module):
self.act_fn = SiluAndMul()
def forward(self, x,
rms_weight: Optional[torch.Tensor] = None,
residual: Optional[torch.Tensor] = None,
update_hd: Optional[bool] = False
rms_weight: torch.Tensor | None = None,
residual: torch.Tensor | None = None,
update_hd: bool | None = False
):
if envs.USE_FUSED_RMS_QUANT:
gate_up, new_resi, _ = self.gate_up_proj(x, rms_weight, residual, update_hd=update_hd)
......@@ -356,8 +356,8 @@ class DeepseekV2MoE(nn.Module):
)
def forward(self, hidden_states: torch.Tensor,
rms_weight: Optional[torch.Tensor] = None,
residual: Optional[torch.Tensor] = None
rms_weight: torch.Tensor | None = None,
residual: torch.Tensor | None = None
) -> torch.Tensor:
num_tokens, hidden_dim = hidden_states.shape
hidden_states = hidden_states.view(-1, hidden_dim)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment