Add inputs_embeds functionality when generating with GPT-Neox (#22916)

* support gpt neox generate with inputs embeds * Update src/transformers/models/gpt_neox/modeling_gpt_neox.py great thx for the suggestion! Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> --------- Co-authored-by: Lei Li <tobiaslee@qq.com> Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

Add inputs_embeds functionality when generating with GPT-Neox (#22916)
* support gpt neox generate with inputs embeds * Update src/transformers/models/gpt_neox/modeling_gpt_neox.py great thx for the suggestion! Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> --------- Co-authored-by: Lei Li <tobiaslee@qq.com> Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
9fdf158a · Lei Li · GitHub · ec93b895 · 9fdf158a
Unverified Commit 9fdf158a authored Apr 21, 2023 by Lei Li Committed by GitHub Apr 21, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 7 deletions

src/transformers/models/gpt_neox/modeling_gpt_neox.py src/transformers/models/gpt_neox/modeling_gpt_neox.py +18 -7

No files found.
--- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py
+++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@@ -697,7 +697,9 @@ class GPTNeoXForCausalLM(GPTNeoXPreTrainedModel):
            attentions=outputs.attentions,
        )
-    def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attention_mask=None, **kwargs):
+    def prepare_inputs_for_generation(
+        self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
+    ):
        input_shape = input_ids.shape
        # cut decoder_input_ids if past is used
@@ -716,12 +718,21 @@ class GPTNeoXForCausalLM(GPTNeoXPreTrainedModel):
        if attention_mask is None:
            attention_mask = input_ids.new_ones(input_shape)
-        return {
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
-            "input_ids": input_ids,
+        if inputs_embeds is not None and past_key_values is None:
-            "attention_mask": attention_mask,
+            model_inputs = {"inputs_embeds": inputs_embeds}
-            "position_ids": position_ids,
+        else:
-            "past_key_values": past_key_values,
+            model_inputs = {"input_ids": input_ids}
-        }
+        model_inputs.update(
+            {
+                "attention_mask": attention_mask,
+                "past_key_values": past_key_values,
+                "position_ids": position_ids,
+            }
+        )
+        return model_inputs
    def _reorder_cache(self, past_key_values, beam_idx):
        reordered_past = ()