Workaround: illegal memory access (#421)

f8353793 · Casper · GitHub · b5db7fcd · f8353793
Unverified Commit f8353793 authored Apr 06, 2024 by Casper Committed by GitHub Apr 06, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

awq/modules/linear/gemv_fast.py awq/modules/linear/gemv_fast.py +2 -1

No files found.
--- a/awq/modules/linear/gemv_fast.py
+++ b/awq/modules/linear/gemv_fast.py
@@ -189,7 +189,8 @@ class WQLinear_GEMVFast(torch.nn.Module):
    @torch.no_grad()
    def forward(self, x):
        inputs = x
-        if inputs.numel() / inputs.shape[-1] < 8:
+        batch_size, n_tokens, _ = inputs.shape
+        if batch_size < 8 and n_tokens == 1:
            out = awq_v2_ext.gemv_forward_cuda_decode(
                inputs,
                self.qweight,