Merge pull request #3145 from sshleifer/bartfp16

[Bart] FP16 Support

Merge pull request #3145 from sshleifer/bartfp16
[Bart] FP16 Support
1741d740 · Thomas Wolf · GitHub · bbabbc16 · 14d40584 · 1741d740
Unverified Commit 1741d740 authored Mar 05, 2020 by Thomas Wolf Committed by GitHub Mar 05, 2020
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 4 deletions

src/transformers/modeling_bart.py src/transformers/modeling_bart.py +4 -4

tests/test_modeling_bart.py tests/test_modeling_bart.py +7 -0

No files found.
--- a/src/transformers/modeling_bart.py
+++ b/src/transformers/modeling_bart.py
@@ -640,9 +640,9 @@ class SelfAttention(nn.Module):
            reshaped = key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool)
            attn_weights = attn_weights.masked_fill(reshaped, float("-inf"))
            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
-        attn_weights_float = F.softmax(attn_weights, dim=-1, dtype=torch.float32)
+        attn_weights = F.softmax(attn_weights, dim=-1)
-        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = F.dropout(attn_weights, p=self.dropout, training=self.training,)
-        attn_probs = F.dropout(attn_weights_float, p=self.dropout, training=self.training,)
        assert v is not None
        attn_output = torch.bmm(attn_probs, v)
        assert attn_output.size() == (bsz * self.num_heads, tgt_len, self.head_dim)
@@ -696,7 +696,7 @@ class SelfAttention(nn.Module):
        elif prev_key_padding_mask is not None:
            filler = torch.zeros(batch_size, src_len - prev_key_padding_mask.size(1))
            if prev_key_padding_mask.is_cuda:
-                filler = filler.cuda()
+                filler = filler.to(prev_key_padding_mask.device)
            new_key_padding_mask = torch.cat([prev_key_padding_mask.float(), filler.float()], dim=1)
        elif key_padding_mask is not None:
            filler = torch.zeros(batch_size, src_len - key_padding_mask.size(1))

--- a/tests/test_modeling_bart.py
+++ b/tests/test_modeling_bart.py
@@ -294,6 +294,13 @@ class BartHeadTests(unittest.TestCase):
            bart_toks = tokenizer.encode(ex, return_tensors="pt")
            _assert_tensors_equal(desired_result.long(), bart_toks, prefix=ex)
+    @unittest.skipIf(torch_device == "cpu", "Cant do half precision")
+    def test_generate_fp16(self):
+        config, input_ids, batch_size = self._get_config_and_data(output_past=True)
+        attention_mask = input_ids.ne(1)
+        lm_model = BartForMaskedLM(config).eval().to(torch_device).half()
+        lm_model.generate(input_ids, attention_mask)
 def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
    """If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""