"encoding/vscode:/vscode.git/clone" did not exist on "b872eb8cc01f6e0dde3d9acfb5846abe5c3a450a"
Commit ff6b265e authored by Azure's avatar Azure
Browse files

Mock triton mla due to precision issue

parent c5f036e8
...@@ -43,11 +43,13 @@ class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention): ...@@ -43,11 +43,13 @@ class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention):
orig_module: nn.Module, orig_module: nn.Module,
device: str = "cuda", device: str = "cuda",
chunck_size: int = 1000, chunck_size: int = 1000,
use_triton: bool = False,
**kwargs): **kwargs):
BaseInjectedModule.__init__(self, key, gguf_loader, config, orig_module, device, **kwargs) BaseInjectedModule.__init__(self, key, gguf_loader, config, orig_module, device, **kwargs)
self.orig_module.__init__(orig_module.config, self.orig_module.__init__(orig_module.config,
orig_module.layer_idx) orig_module.layer_idx)
self.chunck_size = chunck_size # TODO, generate chunck_size automatically. self.chunck_size = chunck_size # TODO, generate chunck_size automatically.
self.use_triton = use_triton
def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
if not (hasattr(self, 'q_absorb') and hasattr(self, 'out_absorb')): if not (hasattr(self, 'q_absorb') and hasattr(self, 'out_absorb')):
...@@ -401,7 +403,7 @@ class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention): ...@@ -401,7 +403,7 @@ class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention):
cache_position: Optional[torch.LongTensor] = None, cache_position: Optional[torch.LongTensor] = None,
**kwargs, **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if os.name == 'nt': if not self.use_triton: # os.name == 'nt'
return self.forward_windows( return self.forward_windows(
hidden_states, hidden_states,
attention_mask, attention_mask,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment