Unverified Commit 03fb8e79 authored by Daquan Lin's avatar Daquan Lin Committed by GitHub
Browse files

Update modeling_tf_longformer.py (#7359)

correct a very small mistake
parent 1ff5bd38
...@@ -348,7 +348,7 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): ...@@ -348,7 +348,7 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer):
# matrix multipication # matrix multipication
# bcxd: batch_size * num_heads x chunks x 2window_overlap x head_dim # bcxd: batch_size * num_heads x chunks x 2window_overlap x head_dim
# bcyd: batch_size * num_heads x chunks x 2window_overlap x head_dim # bcyd: batch_size * num_heads x chunks x 2window_overlap x head_dim
# bcxy: batch_size * num_heads x chunks x 2window_overlap x window_overlap # bcxy: batch_size * num_heads x chunks x 2window_overlap x 2window_overlap
chunked_attention_scores = tf.einsum("bcxd,bcyd->bcxy", chunked_query, chunked_key) # multiply chunked_attention_scores = tf.einsum("bcxd,bcyd->bcxy", chunked_query, chunked_key) # multiply
# convert diagonals into columns # convert diagonals into columns
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment