[Generate] Fix generate with inputs_embeds on GPU (#14564)

cea17acd · Patrick von Platen · GitHub · 25156eb2 · cea17acd
Unverified Commit cea17acd authored Nov 29, 2021 by Patrick von Platen Committed by GitHub Nov 29, 2021
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

src/transformers/generation_utils.py src/transformers/generation_utils.py +1 -1

No files found.
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -401,7 +401,7 @@ class GenerationMixin:
        # First if `inputs_embeds` are given, but no `attention_mask` assume that full attention_mask is used
        if inputs_embeds is not None:
-            return torch.ones((inputs_embeds.shape[0], inputs_embeds.shape[1]), dtype=torch.long)
+            return torch.ones((inputs_embeds.shape[0], inputs_embeds.shape[1]), dtype=torch.long, device=self.device)
        # Otherwise, use `input_ids`
        is_pad_token_in_inputs_ids = (pad_token_id is not None) and (pad_token_id in input_ids)