"examples/vscode:/vscode.git/clone" did not exist on "f7eba090077a443d4a2fd1cd341c822a8fb4dcbc"
Unverified Commit 6678350c authored by Alara Dirik's avatar Alara Dirik Committed by GitHub
Browse files

fixes bugs to handle non-dict output (#18897)

parent 998a90bc
...@@ -1277,7 +1277,7 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel): ...@@ -1277,7 +1277,7 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
) )
# Resize class token # Resize class token
image_embeds = outputs.image_embeds image_embeds = outputs[-3]
new_size = tuple(np.array(image_embeds.shape) - np.array((0, 1, 0))) new_size = tuple(np.array(image_embeds.shape) - np.array((0, 1, 0)))
class_token_out = torch.broadcast_to(image_embeds[:, :1, :], new_size) class_token_out = torch.broadcast_to(image_embeds[:, :1, :], new_size)
...@@ -1293,11 +1293,11 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel): ...@@ -1293,11 +1293,11 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
image_embeds.shape[-1], image_embeds.shape[-1],
) )
image_embeds = image_embeds.reshape(new_size) image_embeds = image_embeds.reshape(new_size)
text_embeds = outputs.text_embeds text_embeds = outputs[-4]
# Last hidden states from text and vision transformers # Last hidden states from text and vision transformers
text_model_last_hidden_state = outputs.text_model_output.last_hidden_state text_model_last_hidden_state = outputs[-2][0]
vision_model_last_hidden_state = outputs.vision_model_output.last_hidden_state vision_model_last_hidden_state = outputs[-1][0]
return (text_embeds, image_embeds, text_model_last_hidden_state, vision_model_last_hidden_state) return (text_embeds, image_embeds, text_model_last_hidden_state, vision_model_last_hidden_state)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment