fix: fix quant linear autotune

083c2de9 · OlivierDehaene · 773aabdd · 083c2de9
Commit 083c2de9 authored Dec 14, 2023 by OlivierDehaene
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

server/text_generation_server/utils/gptq/custom_autotune.py server/text_generation_server/utils/gptq/custom_autotune.py +1 -1

No files found.
--- a/server/text_generation_server/utils/gptq/custom_autotune.py
+++ b/server/text_generation_server/utils/gptq/custom_autotune.py
@@ -88,7 +88,7 @@ class Autotuner(triton.KernelInterface):
            # In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
            # PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
            return triton.testing.do_bench(
-                kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40
+                kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
            )
        except triton.OutOfResources:
            return (float("inf"), float("inf"), float("inf"))