Move labels to the same device as logits for LlamaForSequenceClassification and Blip2 (#22596)

* (feat): Move labels to the same device as logits * Trigger CI * Trigger CI * Trigger CI * (feat): Making changes for Blip2

Move labels to the same device as logits for LlamaForSequenceClassification and Blip2 (#22596)
* (feat): Move labels to the same device as logits * Trigger CI * Trigger CI * Trigger CI * (feat): Making changes for Blip2
1de8ce9e · Shikhar Chauhan · GitHub · d59034ff · 1de8ce9e · 1de8ce9e
Unverified Commit 1de8ce9e authored Apr 07, 2023 by Shikhar Chauhan Committed by GitHub Apr 07, 2023
Showing with 3 additions and 0 deletions

src/transformers/models/blip_2/modeling_blip_2.py src/transformers/models/blip_2/modeling_blip_2.py +2 -0

src/transformers/models/llama/modeling_llama.py src/transformers/models/llama/modeling_llama.py +1 -0

No files found.
--- a/src/transformers/models/blip_2/modeling_blip_2.py
+++ b/src/transformers/models/blip_2/modeling_blip_2.py
@@ -1522,6 +1522,7 @@ class Blip2Model(Blip2PreTrainedModel):
            loss = None
            # we compute the loss here since we need to take into account the sequence length of the query embeds
            if labels is not None:
+                labels = labels.to(logits.device)
                logits = logits[:, -labels.size(1) :, :]
                # Shift so that tokens < n predict n
                shift_logits = logits[..., :-1, :].contiguous()
@@ -1757,6 +1758,7 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
            loss = None
            # we compute the loss here since we need to take into account the sequence length of the query embeds
            if labels is not None:
+                labels = labels.to(logits.device)
                logits = logits[:, -labels.size(1) :, :]
                # Shift so that tokens < n predict n
                shift_logits = logits[..., :-1, :].contiguous()

--- a/src/transformers/models/llama/modeling_llama.py
+++ b/src/transformers/models/llama/modeling_llama.py
@@ -850,6 +850,7 @@ class LlamaForSequenceClassification(LlamaPreTrainedModel):
        loss = None
        if labels is not None:
+            labels = labels.to(logits.device)
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"