Commit b10ca211 authored by mshoeybi's avatar mshoeybi
Browse files

fixed a bug

parent 8f7c0d11
......@@ -214,8 +214,9 @@ class ParallelAttention(MegatronModule):
# is not provided, make sure there is no potential memory left
# from previous inference.
else:
self.inference_key_memory = None
self.inference_value_memory = None
self.inference_current_sequence_len = None
# =====================
# Query, Key, and Value
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment