Unverified Commit f00bceab authored by stalkermustang's avatar stalkermustang Committed by GitHub
Browse files

Fix typo in comment (#14102)

parent 234cfefb
...@@ -808,7 +808,7 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -808,7 +808,7 @@ class GPT2Model(GPT2PreTrainedModel):
attention_mask = attention_mask.to(dtype=self.dtype) # fp16 compatibility attention_mask = attention_mask.to(dtype=self.dtype) # fp16 compatibility
attention_mask = (1.0 - attention_mask) * -10000.0 attention_mask = (1.0 - attention_mask) * -10000.0
# If a 2D ou 3D attention mask is provided for the cross-attention # If a 2D or 3D attention mask is provided for the cross-attention
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length] # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
if self.config.add_cross_attention and encoder_hidden_states is not None: if self.config.add_cross_attention and encoder_hidden_states is not None:
encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size() encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment