Commit 7a3d06d6 authored by Casper Hansen's avatar Casper Hansen
Browse files

Update comment

parent 204a3a12
...@@ -26,8 +26,10 @@ class WindowedCache: ...@@ -26,8 +26,10 @@ class WindowedCache:
def roll_kv(self, roll_len, start_pos): def roll_kv(self, roll_len, start_pos):
""" """
For example, with roll_len=3 and [A,B,C,D,E] we get [D,E,F,G,H] With sink=0, roll_len=3, and [A,B,C,D,E] we get [D,E,F,G,H]
With sink=1, roll_len=3, and [A,B,C,D,E] we get [A,E,F,G,H] With sink=1, roll_len=3, and [A,B,C,D,E] we get [A,E,F,G,H]
With sink=2, roll_len=3, and [A,B,C,D,E] we get [A,B,F,G,H]
With sink=3, roll_len=3, and [A,B,C,D,E] we get [A,B,C,G,H]
""" """
# Roll only the necessary part of the cache to the left # Roll only the necessary part of the cache to the left
self.v[:, :, self.attention_sinks:-roll_len+self.attention_sinks, :] = self.v[:, :, roll_len:, :] self.v[:, :, self.attention_sinks:-roll_len+self.attention_sinks, :] = self.v[:, :, roll_len:, :]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment