Fix other PyTorch models

2f3a4210 · Julien Chaumond · d5319793 · 2f3a4210 · 2f3a4210
Commit 2f3a4210 authored Nov 06, 2019 by Julien Chaumond
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 3 deletions

templates/adding_a_new_model/modeling_xxx.py templates/adding_a_new_model/modeling_xxx.py +4 -2

transformers/modeling_distilbert.py transformers/modeling_distilbert.py +3 -1

No files found.
--- a/templates/adding_a_new_model/modeling_xxx.py
+++ b/templates/adding_a_new_model/modeling_xxx.py
@@ -309,10 +309,12 @@ class XxxModel(XxxPreTrainedModel):
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")

+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+
        if attention_mask is None:
-            attention_mask = torch.ones(input_shape)
+            attention_mask = torch.ones(input_shape, device=device)
        if token_type_ids is None:
-            token_type_ids = torch.zeros(input_shape, dtype=torch.long)
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)

        # We create a 3D attention mask from a 2D tensor mask.
        # Sizes are [batch_size, 1, 1, to_seq_length]

--- a/transformers/modeling_distilbert.py
+++ b/transformers/modeling_distilbert.py
@@ -450,8 +450,10 @@ class DistilBertModel(DistilBertPreTrainedModel):
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")

+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+
        if attention_mask is None:
-            attention_mask = torch.ones(input_shape) # (bs, seq_length)
+            attention_mask = torch.ones(input_shape, device=device) # (bs, seq_length)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head