Fix dropout in `StarCoder` (#27182)

fix dropout in modeling_gpt_bigcode.py

Fix dropout in `StarCoder` (#27182)
fix dropout in modeling_gpt_bigcode.py
e22b7ced · Susnato Dhar · GitHub · 4bb50aa2 · e22b7ced
Unverified Commit e22b7ced authored Oct 31, 2023 by Susnato Dhar Committed by GitHub Oct 31, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -1

No files found.
--- a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
+++ b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@@ -364,7 +364,7 @@ class GPTBigCodeFlashAttention2(GPTBigCodeAttention):
            key = key.transpose(1, 2).reshape(batch_size, tgt, self.num_heads, self.head_dim)
            value = value.transpose(1, 2).reshape(batch_size, tgt, self.num_heads, self.head_dim)

-        attn_dropout = self.dropout if self.training else 0.0
+        attn_dropout = self.config.attn_pdrop if self.training else 0.0

        softmax_dtype = torch.float32 if self.attention_softmax_in_fp32 else query.dtype
        upcast = query.dtype != softmax_dtype