Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
50c746ee
Unverified
Commit
50c746ee
authored
Sep 22, 2021
by
Gunjan Chhablani
Committed by
GitHub
Sep 22, 2021
Browse files
Allow only textual inputs to VisualBert (#13687)
parent
93624bfe
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
11 deletions
+12
-11
src/transformers/models/visual_bert/modeling_visual_bert.py
src/transformers/models/visual_bert/modeling_visual_bert.py
+12
-11
No files found.
src/transformers/models/visual_bert/modeling_visual_bert.py
View file @
50c746ee
...
@@ -778,29 +778,30 @@ class VisualBertModel(VisualBertPreTrainedModel):
...
@@ -778,29 +778,30 @@ class VisualBertModel(VisualBertPreTrainedModel):
else
:
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
visual_embeds
is
None
:
raise
ValueError
(
f
"`visual_embeds` can not be of type
{
type
(
visual_embeds
)
}
when using a VisualBert Model."
)
batch_size
,
seq_length
=
input_shape
batch_size
,
seq_length
=
input_shape
device
=
input_ids
.
device
if
input_ids
is
not
None
else
inputs_embeds
.
device
device
=
input_ids
.
device
if
input_ids
is
not
None
else
inputs_embeds
.
device
visual_input_shape
=
visual_embeds
.
size
()[:
-
1
]
if
visual_embeds
is
not
None
:
visual_input_shape
=
visual_embeds
.
size
()[:
-
1
]
if
attention_mask
is
None
:
if
attention_mask
is
None
:
attention_mask
=
torch
.
ones
(
input_shape
,
device
=
device
)
attention_mask
=
torch
.
ones
(
input_shape
,
device
=
device
)
if
visual_attention_mask
is
None
:
if
visual_embeds
is
not
None
and
visual_attention_mask
is
None
:
visual_attention_mask
=
torch
.
ones
(
visual_input_shape
,
device
=
device
)
visual_attention_mask
=
torch
.
ones
(
visual_input_shape
,
device
=
device
)
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
# ourselves in which case we just need to make it broadcastable to all heads.
# ourselves in which case we just need to make it broadcastable to all heads.
if
visual_embeds
is
not
None
:
combined_attention_mask
=
torch
.
cat
((
attention_mask
,
visual_attention_mask
),
dim
=-
1
)
extended_attention_mask
:
torch
.
Tensor
=
self
.
get_extended_attention_mask
(
combined_attention_mask
,
[
batch_size
,
input_shape
+
visual_input_shape
],
device
)
combined_attention_mask
=
torch
.
cat
((
attention_mask
,
visual_attention_mask
),
dim
=-
1
)
else
:
extended_attention_mask
:
torch
.
Tensor
=
self
.
get_extended_attention_mask
(
extended_attention_mask
:
torch
.
Tensor
=
self
.
get_extended_attention_mask
(
combined_
attention_mask
,
[
batch_size
,
input_shape
+
visual_input_shape
],
device
attention_mask
,
[
batch_size
,
input_shape
],
device
)
)
# Prepare head mask if needed
# Prepare head mask if needed
# 1.0 in head_mask indicate we keep the head
# 1.0 in head_mask indicate we keep the head
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment