Unverified Commit 7f8fcd39 authored by Jerry Zhang's avatar Jerry Zhang Committed by GitHub
Browse files

Turn off autotune for scaled mm for fp8 dynamic quant in torchao (#2116)

parent 5c6a41fa
...@@ -401,6 +401,10 @@ class TorchNativeLlamaForCausalLM(nn.Module): ...@@ -401,6 +401,10 @@ class TorchNativeLlamaForCausalLM(nn.Module):
self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
self.logits_processor = LogitsProcessor(config) self.logits_processor = LogitsProcessor(config)
# turning off autotune for fp8dq since it doesn't give speedup and
# increases compile time significantly
torch._inductor.config.max_autotune_gemm_backends = "ATEN"
@torch.no_grad() @torch.no_grad()
def forward( def forward(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment