Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a64bcb56
Unverified
Commit
a64bcb56
authored
Jul 29, 2022
by
Yih-Dar
Committed by
GitHub
Jul 29, 2022
Browse files
Fix OwlViT torchscript tests (#18347)
Co-authored-by:
ydshieh
<
ydshieh@users.noreply.github.com
>
parent
a4ee463d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
6 deletions
+7
-6
src/transformers/models/owlvit/modeling_owlvit.py
src/transformers/models/owlvit/modeling_owlvit.py
+7
-6
No files found.
src/transformers/models/owlvit/modeling_owlvit.py
View file @
a64bcb56
...
...
@@ -1153,7 +1153,6 @@ class OwlViTClassPredictionHead(nn.Module):
class
OwlViTForObjectDetection
(
OwlViTPreTrainedModel
):
config_class
=
OwlViTConfig
main_input_name
=
"pixel_values"
def
__init__
(
self
,
config
:
OwlViTConfig
):
super
().
__init__
(
config
)
...
...
@@ -1246,8 +1245,8 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
def
image_text_embedder
(
self
,
pixel_values
:
torch
.
FloatTensor
,
input_ids
:
torch
.
Tensor
,
pixel_values
:
torch
.
FloatTensor
,
attention_mask
:
torch
.
Tensor
,
output_attentions
:
Optional
[
bool
]
=
None
,
)
->
torch
.
FloatTensor
:
...
...
@@ -1284,8 +1283,8 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
@
replace_return_docstrings
(
output_type
=
OwlViTObjectDetectionOutput
,
config_class
=
OwlViTConfig
)
def
forward
(
self
,
pixel_values
:
torch
.
FloatTensor
,
input_ids
:
torch
.
Tensor
,
pixel_values
:
torch
.
FloatTensor
,
attention_mask
:
Optional
[
torch
.
Tensor
]
=
None
,
output_attentions
:
Optional
[
bool
]
=
None
,
output_hidden_states
:
Optional
[
bool
]
=
None
,
...
...
@@ -1338,8 +1337,8 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
if
output_hidden_states
:
outputs
=
self
.
owlvit
(
pixel_values
=
pixel_values
,
input_ids
=
input_ids
,
pixel_values
=
pixel_values
,
attention_mask
=
attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
...
...
@@ -1350,8 +1349,8 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
# Embed images and text queries
feature_map
,
query_embeds
=
self
.
image_text_embedder
(
pixel_values
=
pixel_values
,
input_ids
=
input_ids
,
pixel_values
=
pixel_values
,
attention_mask
=
attention_mask
,
output_attentions
=
output_attentions
,
)
...
...
@@ -1374,7 +1373,7 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
pred_boxes
=
self
.
box_predictor
(
image_feats
,
feature_map
)
if
not
return_dict
:
return
(
output
=
(
pred_logits
,
pred_boxes
,
query_embeds
,
...
...
@@ -1383,6 +1382,8 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
text_model_last_hidden_states
,
vision_model_last_hidden_states
,
)
output
=
tuple
(
x
for
x
in
output
if
x
is
not
None
)
return
output
return
OwlViTObjectDetectionOutput
(
image_embeds
=
feature_map
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment