Fix usage of engine

133dd7a7 · Casper · fbeea40b · 133dd7a7
Commit 133dd7a7 authored Sep 21, 2023 by Casper
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

awq/modules/fused/attn.py awq/modules/fused/attn.py +1 -1

No files found.
--- a/awq/modules/fused/attn.py
+++ b/awq/modules/fused/attn.py
@@ -219,7 +219,7 @@ class QuantAttentionFused(nn.Module):
            xv = xv.view((bsz,) + self.attention_shapes["single_xv_view"])

            past_key_value = (xk, xv) if use_cache else None
-            attention_weight = awq_inference_engine.single_query_attention(
+            attention_weight = ft_inference_engine.single_query_attention(
                xq, # query
                xk, # key
                xv, # value