Commit 133dd7a7 authored by Casper's avatar Casper
Browse files

Fix usage of engine

parent fbeea40b
...@@ -219,7 +219,7 @@ class QuantAttentionFused(nn.Module): ...@@ -219,7 +219,7 @@ class QuantAttentionFused(nn.Module):
xv = xv.view((bsz,) + self.attention_shapes["single_xv_view"]) xv = xv.view((bsz,) + self.attention_shapes["single_xv_view"])
past_key_value = (xk, xv) if use_cache else None past_key_value = (xk, xv) if use_cache else None
attention_weight = awq_inference_engine.single_query_attention( attention_weight = ft_inference_engine.single_query_attention(
xq, # query xq, # query
xk, # key xk, # key
xv, # value xv, # value
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment