Unverified Commit 349bb2c9 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Fix error when disabling new DeepGEMM (#7198)

parent 0b8939bc
......@@ -584,8 +584,10 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
use_fp8=use_fp8,
async_finish=not self.return_recv_hook,
return_recv_hook=self.return_recv_hook,
round_scale=deep_gemm_wrapper.DEEPGEMM_V202506,
use_ue8m0=deep_gemm_wrapper.DEEPGEMM_V202506,
round_scale=deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
and deep_gemm_wrapper.DEEPGEMM_V202506,
use_ue8m0=deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
and deep_gemm_wrapper.DEEPGEMM_V202506,
)
)
return packed_recv_hidden, packed_recv_count, event, hook
......
......@@ -1914,7 +1914,10 @@ class DeepseekV2ForCausalLM(nn.Module):
self_attn.w_vc = bind_or_assign(self_attn.w_vc, w_vc.contiguous())
self_attn.use_deep_gemm_bmm = True
if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0:
if (
deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0
):
self._weight_requant_ue8m0()
def _weight_requant_ue8m0(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment