Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
eb6c59bc
Unverified
Commit
eb6c59bc
authored
Feb 13, 2023
by
Joao Gante
Committed by
GitHub
Feb 13, 2023
Browse files
Generate: TF supports multiple eos tokens (#21571)
parent
c836f772
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
310 additions
and
224 deletions
+310
-224
src/transformers/generation/tf_utils.py
src/transformers/generation/tf_utils.py
+49
-14
src/transformers/generation/utils.py
src/transformers/generation/utils.py
+14
-14
tests/generation/test_framework_agnostic.py
tests/generation/test_framework_agnostic.py
+172
-0
tests/generation/test_tf_utils.py
tests/generation/test_tf_utils.py
+38
-0
tests/generation/test_utils.py
tests/generation/test_utils.py
+37
-196
No files found.
src/transformers/generation/tf_utils.py
View file @
eb6c59bc
...
@@ -1230,7 +1230,7 @@ class TFGenerationMixin:
...
@@ -1230,7 +1230,7 @@ class TFGenerationMixin:
)
->
tf
.
Tensor
:
)
->
tf
.
Tensor
:
if
self
.
config
.
is_encoder_decoder
and
encoder_outputs
is
not
None
:
if
self
.
config
.
is_encoder_decoder
and
encoder_outputs
is
not
None
:
# make dummy input_ids with value -100, as a sanity check ensuring that they won't be used for encoding
# make dummy input_ids with value -100, as a sanity check ensuring that they won't be used for encoding
shape
=
encoder_outputs
.
last_hidden_state
.
s
ize
()
[:
-
1
]
shape
=
encoder_outputs
.
last_hidden_state
.
s
hape
[:
-
1
]
return
tf
.
ones
(
shape
,
dtype
=
tf
.
int32
)
*
-
100
return
tf
.
ones
(
shape
,
dtype
=
tf
.
int32
)
*
-
100
if
bos_token_id
is
None
:
if
bos_token_id
is
None
:
...
@@ -1515,8 +1515,8 @@ class TFGenerationMixin:
...
@@ -1515,8 +1515,8 @@ class TFGenerationMixin:
The maximum length of the sequence to be generated.
The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -1575,6 +1575,8 @@ class TFGenerationMixin:
...
@@ -1575,6 +1575,8 @@ class TFGenerationMixin:
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
if
isinstance
(
eos_token_id
,
int
):
eos_token_id
=
[
eos_token_id
]
output_scores
=
output_scores
if
output_scores
is
not
None
else
self
.
generation_config
.
output_scores
output_scores
=
output_scores
if
output_scores
is
not
None
else
self
.
generation_config
.
output_scores
output_attentions
=
(
output_attentions
=
(
output_attentions
if
output_attentions
is
not
None
else
self
.
generation_config
.
output_attentions
output_attentions
if
output_attentions
is
not
None
else
self
.
generation_config
.
output_attentions
...
@@ -1660,7 +1662,13 @@ class TFGenerationMixin:
...
@@ -1660,7 +1662,13 @@ class TFGenerationMixin:
raise
ValueError
(
"If `eos_token_id` is defined, make sure that `pad_token_id` is defined."
)
raise
ValueError
(
"If `eos_token_id` is defined, make sure that `pad_token_id` is defined."
)
unfinished_seq
=
1
-
tf
.
cast
(
finished_sequences
,
tf
.
int32
)
unfinished_seq
=
1
-
tf
.
cast
(
finished_sequences
,
tf
.
int32
)
next_tokens
=
next_tokens
*
unfinished_seq
+
pad_token_id
*
(
1
-
unfinished_seq
)
next_tokens
=
next_tokens
*
unfinished_seq
+
pad_token_id
*
(
1
-
unfinished_seq
)
finished_sequences
=
finished_sequences
|
(
next_tokens
==
eos_token_id
)
next_token_is_eos
=
tf
.
math
.
reduce_any
(
tf
.
equal
(
tf
.
broadcast_to
(
next_tokens
,
(
len
(
eos_token_id
),
batch_size
)),
tf
.
expand_dims
(
eos_token_id
,
-
1
)
),
axis
=
0
,
)
finished_sequences
=
finished_sequences
|
next_token_is_eos
# update `generated` and `cur_len`
# update `generated` and `cur_len`
update_indices
=
tf
.
stack
([
tf
.
range
(
batch_size
),
tf
.
broadcast_to
(
cur_len
,
[
batch_size
])],
axis
=-
1
)
update_indices
=
tf
.
stack
([
tf
.
range
(
batch_size
),
tf
.
broadcast_to
(
cur_len
,
[
batch_size
])],
axis
=-
1
)
...
@@ -1776,8 +1784,8 @@ class TFGenerationMixin:
...
@@ -1776,8 +1784,8 @@ class TFGenerationMixin:
The maximum length of the sequence to be generated.
The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
seed (`List[int]`, *optional*):
seed (`List[int]`, *optional*):
Random seed to control sampling, containing two integers, used when `do_sample` is `True`. See the
Random seed to control sampling, containing two integers, used when `do_sample` is `True`. See the
`seed` argument from stateless functions in `tf.random`.
`seed` argument from stateless functions in `tf.random`.
...
@@ -1852,6 +1860,8 @@ class TFGenerationMixin:
...
@@ -1852,6 +1860,8 @@ class TFGenerationMixin:
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
if
isinstance
(
eos_token_id
,
int
):
eos_token_id
=
[
eos_token_id
]
output_scores
=
output_scores
if
output_scores
is
not
None
else
self
.
generation_config
.
output_scores
output_scores
=
output_scores
if
output_scores
is
not
None
else
self
.
generation_config
.
output_scores
output_attentions
=
(
output_attentions
=
(
output_attentions
if
output_attentions
is
not
None
else
self
.
generation_config
.
output_attentions
output_attentions
if
output_attentions
is
not
None
else
self
.
generation_config
.
output_attentions
...
@@ -1943,7 +1953,13 @@ class TFGenerationMixin:
...
@@ -1943,7 +1953,13 @@ class TFGenerationMixin:
raise
ValueError
(
"If `eos_token_id` is defined, make sure that `pad_token_id` is defined."
)
raise
ValueError
(
"If `eos_token_id` is defined, make sure that `pad_token_id` is defined."
)
unfinished_seq
=
1
-
tf
.
cast
(
finished_sequences
,
tf
.
int32
)
unfinished_seq
=
1
-
tf
.
cast
(
finished_sequences
,
tf
.
int32
)
next_tokens
=
next_tokens
*
unfinished_seq
+
pad_token_id
*
(
1
-
unfinished_seq
)
next_tokens
=
next_tokens
*
unfinished_seq
+
pad_token_id
*
(
1
-
unfinished_seq
)
finished_sequences
=
finished_sequences
|
(
next_tokens
==
eos_token_id
)
next_token_is_eos
=
tf
.
math
.
reduce_any
(
tf
.
equal
(
tf
.
broadcast_to
(
next_tokens
,
(
len
(
eos_token_id
),
batch_size
)),
tf
.
expand_dims
(
eos_token_id
,
-
1
)
),
axis
=
0
,
)
finished_sequences
=
finished_sequences
|
next_token_is_eos
# update `generated` and `cur_len`
# update `generated` and `cur_len`
update_indices
=
tf
.
stack
([
tf
.
range
(
batch_size
),
tf
.
broadcast_to
(
cur_len
,
[
batch_size
])],
axis
=-
1
)
update_indices
=
tf
.
stack
([
tf
.
range
(
batch_size
),
tf
.
broadcast_to
(
cur_len
,
[
batch_size
])],
axis
=-
1
)
...
@@ -2079,8 +2095,8 @@ class TFGenerationMixin:
...
@@ -2079,8 +2095,8 @@ class TFGenerationMixin:
The maximum length of the sequence to be generated.
The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
length_penalty (`float`, *optional*, defaults to 1.0):
length_penalty (`float`, *optional*, defaults to 1.0):
Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent
Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent
to the sequence length, which in turn is used to divide the score of the sequence. Since the score is
to the sequence length, which in turn is used to divide the score of the sequence. Since the score is
...
@@ -2180,6 +2196,8 @@ class TFGenerationMixin:
...
@@ -2180,6 +2196,8 @@ class TFGenerationMixin:
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
if
isinstance
(
eos_token_id
,
int
):
eos_token_id
=
[
eos_token_id
]
num_return_sequences
=
(
num_return_sequences
=
(
num_return_sequences
if
num_return_sequences
is
not
None
else
self
.
generation_config
.
num_return_sequences
num_return_sequences
if
num_return_sequences
is
not
None
else
self
.
generation_config
.
num_return_sequences
)
)
...
@@ -2401,9 +2419,18 @@ class TFGenerationMixin:
...
@@ -2401,9 +2419,18 @@ class TFGenerationMixin:
# Update current sequences: Did the top `num_beams` sequences reach an end marker?
# Update current sequences: Did the top `num_beams` sequences reach an end marker?
# To prevent these just finished sequences from being added to the current sequences
# To prevent these just finished sequences from being added to the current sequences
# set of active beam search sequences, set their log probs to a very large negative value.
# set of active beam search sequences, set their log probs to a very large negative value.
eos_in_next_token
=
topk_sequences
[:,
:,
cur_len
]
==
eos_token_id
if
eos_token_id
is
None
:
if
eos_token_id
is
None
:
eos_in_next_token
=
tf
.
broadcast_to
(
eos_in_next_token
,
topk_sequences
[:,
:,
cur_len
].
shape
)
eos_in_next_token
=
tf
.
zeros
(
topk_sequences
[:,
:,
cur_len
].
shape
,
dtype
=
tf
.
bool
)
else
:
eos_in_next_token
=
tf
.
math
.
reduce_any
(
tf
.
equal
(
tf
.
broadcast_to
(
topk_sequences
[:,
:,
cur_len
],
[
len
(
eos_token_id
)]
+
topk_sequences
[:,
:,
cur_len
].
shape
),
tf
.
expand_dims
(
tf
.
expand_dims
(
eos_token_id
,
-
1
),
-
1
),
),
axis
=
0
,
)
did_topk_just_finished
=
eos_in_next_token
&
tf
.
broadcast_to
(
did_topk_just_finished
=
eos_in_next_token
&
tf
.
broadcast_to
(
tf
.
concat
((
tf
.
ones
((
num_beams
),
dtype
=
tf
.
bool
),
tf
.
zeros
((
num_beams
),
dtype
=
tf
.
bool
)),
axis
=
0
),
tf
.
concat
((
tf
.
ones
((
num_beams
),
dtype
=
tf
.
bool
),
tf
.
zeros
((
num_beams
),
dtype
=
tf
.
bool
)),
axis
=
0
),
shape_list
(
eos_in_next_token
),
shape_list
(
eos_in_next_token
),
...
@@ -2649,8 +2676,8 @@ class TFGenerationMixin:
...
@@ -2649,8 +2676,8 @@ class TFGenerationMixin:
The maximum length of the sequence to be generated.
The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -2700,6 +2727,8 @@ class TFGenerationMixin:
...
@@ -2700,6 +2727,8 @@ class TFGenerationMixin:
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
max_length
=
max_length
if
max_length
is
not
None
else
self
.
generation_config
.
max_length
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
pad_token_id
=
pad_token_id
if
pad_token_id
is
not
None
else
self
.
generation_config
.
pad_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
eos_token_id
=
eos_token_id
if
eos_token_id
is
not
None
else
self
.
generation_config
.
eos_token_id
if
isinstance
(
eos_token_id
,
int
):
eos_token_id
=
[
eos_token_id
]
output_scores
=
output_scores
if
output_scores
is
not
None
else
self
.
generation_config
.
output_scores
output_scores
=
output_scores
if
output_scores
is
not
None
else
self
.
generation_config
.
output_scores
output_attentions
=
(
output_attentions
=
(
output_attentions
if
output_attentions
is
not
None
else
self
.
generation_config
.
output_attentions
output_attentions
if
output_attentions
is
not
None
else
self
.
generation_config
.
output_attentions
...
@@ -2924,7 +2953,13 @@ class TFGenerationMixin:
...
@@ -2924,7 +2953,13 @@ class TFGenerationMixin:
raise
ValueError
(
"If `eos_token_id` is defined, make sure that `pad_token_id` is defined."
)
raise
ValueError
(
"If `eos_token_id` is defined, make sure that `pad_token_id` is defined."
)
unfinished_seq
=
1
-
tf
.
cast
(
finished_sequences
,
tf
.
int32
)
unfinished_seq
=
1
-
tf
.
cast
(
finished_sequences
,
tf
.
int32
)
next_tokens
=
next_tokens
*
unfinished_seq
+
pad_token_id
*
(
1
-
unfinished_seq
)
next_tokens
=
next_tokens
*
unfinished_seq
+
pad_token_id
*
(
1
-
unfinished_seq
)
finished_sequences
=
finished_sequences
|
(
next_tokens
==
eos_token_id
)
next_token_is_eos
=
tf
.
math
.
reduce_any
(
tf
.
equal
(
tf
.
broadcast_to
(
next_tokens
,
(
len
(
eos_token_id
),
batch_size
)),
tf
.
expand_dims
(
eos_token_id
,
-
1
)
),
axis
=
0
,
)
finished_sequences
=
finished_sequences
|
next_token_is_eos
# update `generated` and `cur_len`
# update `generated` and `cur_len`
update_indices
=
tf
.
stack
([
tf
.
range
(
batch_size
),
tf
.
broadcast_to
(
cur_len
,
[
batch_size
])],
axis
=-
1
)
update_indices
=
tf
.
stack
([
tf
.
range
(
batch_size
),
tf
.
broadcast_to
(
cur_len
,
[
batch_size
])],
axis
=-
1
)
...
...
src/transformers/generation/utils.py
View file @
eb6c59bc
...
@@ -1702,8 +1702,8 @@ class GenerationMixin:
...
@@ -1702,8 +1702,8 @@ class GenerationMixin:
used to tell if the generation loop should stop.
used to tell if the generation loop should stop.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -2057,8 +2057,8 @@ class GenerationMixin:
...
@@ -2057,8 +2057,8 @@ class GenerationMixin:
tokens. The maximum length of the sequence to be generated.
tokens. The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -2306,8 +2306,8 @@ class GenerationMixin:
...
@@ -2306,8 +2306,8 @@ class GenerationMixin:
tokens. The maximum length of the sequence to be generated.
tokens. The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -2574,8 +2574,8 @@ class GenerationMixin:
...
@@ -2574,8 +2574,8 @@ class GenerationMixin:
tokens. The maximum length of the sequence to be generated.
tokens. The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -2902,8 +2902,8 @@ class GenerationMixin:
...
@@ -2902,8 +2902,8 @@ class GenerationMixin:
tokens. The maximum length of the sequence to be generated.
tokens. The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -3230,8 +3230,8 @@ class GenerationMixin:
...
@@ -3230,8 +3230,8 @@ class GenerationMixin:
tokens. The maximum length of the sequence to be generated.
tokens. The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
@@ -3613,8 +3613,8 @@ class GenerationMixin:
...
@@ -3613,8 +3613,8 @@ class GenerationMixin:
tokens. The maximum length of the sequence to be generated.
tokens. The maximum length of the sequence to be generated.
pad_token_id (`int`, *optional*):
pad_token_id (`int`, *optional*):
The id of the *padding* token.
The id of the *padding* token.
eos_token_id (`
int
`, *optional*):
eos_token_id (`
Union[int, List[int]]
`, *optional*):
The id of the *end-of-sequence* token.
The id of the *end-of-sequence* token.
Optionally, use a list to set multiple *end-of-sequence* tokens.
output_attentions (`bool`, *optional*, defaults to `False`):
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more details.
returned tensors for more details.
...
...
tests/generation/test_framework_agnostic.py
View file @
eb6c59bc
...
@@ -12,11 +12,15 @@ class GenerationIntegrationTestsMixin:
...
@@ -12,11 +12,15 @@ class GenerationIntegrationTestsMixin:
# To be populated by the child classes
# To be populated by the child classes
framework_dependent_parameters
=
{
framework_dependent_parameters
=
{
"AutoModelForCausalLM"
:
None
,
"AutoModelForCausalLM"
:
None
,
"AutoModelForSpeechSeq2Seq"
:
None
,
"AutoModelForSeq2SeqLM"
:
None
,
"AutoModelForSeq2SeqLM"
:
None
,
"AutoModelForVision2Seq"
:
None
,
"LogitsProcessorList"
:
None
,
"LogitsProcessorList"
:
None
,
"MinLengthLogitsProcessor"
:
None
,
"MinLengthLogitsProcessor"
:
None
,
"create_tensor_fn"
:
None
,
"create_tensor_fn"
:
None
,
"floats_tensor"
:
None
,
"return_tensors"
:
None
,
"return_tensors"
:
None
,
"set_seed"
:
None
,
}
}
def
test_validate_generation_inputs
(
self
):
def
test_validate_generation_inputs
(
self
):
...
@@ -486,3 +490,171 @@ class GenerationIntegrationTestsMixin:
...
@@ -486,3 +490,171 @@ class GenerationIntegrationTestsMixin:
input_ids
=
tokenizer
(
article
,
return_tensors
=
return_tensors
).
input_ids
input_ids
=
tokenizer
(
article
,
return_tensors
=
return_tensors
).
input_ids
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model
.
generate
(
input_ids
,
input_ids
=
input_ids
)
model
.
generate
(
input_ids
,
input_ids
=
input_ids
)
def
test_generate_too_many_encoder_kwargs
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForSeq2SeqLM"
]
return_tensors
=
self
.
framework_dependent_parameters
[
"return_tensors"
]
article
=
"""I need input_ids to generate"""
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
)
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
,
max_length
=
10
)
input_ids
=
tokenizer
(
article
,
return_tensors
=
return_tensors
).
input_ids
with
self
.
assertRaises
(
ValueError
):
model
.
generate
(
input_ids
=
input_ids
,
inputs_embeds
=
input_ids
)
def
test_generate_input_features_as_encoder_kwarg
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForSpeechSeq2Seq"
]
floats_tensor
=
self
.
framework_dependent_parameters
[
"floats_tensor"
]
is_pt
=
not
model_cls
.
__name__
.
startswith
(
"TF"
)
input_features
=
floats_tensor
((
3
,
80
,
60
))
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-WhisperForConditionalGeneration"
)
if
is_pt
:
input_features
.
to
(
torch_device
)
model
=
model
.
to
(
torch_device
)
output_sequences_kwargs
=
model
.
generate
(
input_features
=
input_features
,
max_length
=
5
)
output_sequences
=
model
.
generate
(
input_features
,
max_length
=
5
)
if
is_pt
:
output_sequences_kwargs
=
output_sequences_kwargs
.
cpu
().
numpy
()
output_sequences
=
output_sequences
.
cpu
().
numpy
()
self
.
assertTrue
(
np
.
array_equal
(
output_sequences
,
output_sequences_kwargs
))
self
.
assertEqual
(
output_sequences
.
shape
,
(
3
,
5
))
def
test_generate_pixel_values_as_encoder_kwarg
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForVision2Seq"
]
floats_tensor
=
self
.
framework_dependent_parameters
[
"floats_tensor"
]
is_pt
=
not
model_cls
.
__name__
.
startswith
(
"TF"
)
pixel_values
=
floats_tensor
((
2
,
3
,
30
,
30
))
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-VisionEncoderDecoderModel-vit-gpt2"
)
model
.
config
.
decoder
.
eos_token_id
=
None
if
is_pt
:
pixel_values
=
pixel_values
.
to
(
torch_device
)
model
=
model
.
to
(
torch_device
)
output_sequences_kwargs
=
model
.
generate
(
pixel_values
=
pixel_values
,
max_length
=
5
)
output_sequences
=
model
.
generate
(
pixel_values
,
max_length
=
5
)
if
is_pt
:
output_sequences_kwargs
=
output_sequences_kwargs
.
cpu
().
numpy
()
output_sequences
=
output_sequences
.
cpu
().
numpy
()
self
.
assertTrue
(
np
.
array_equal
(
output_sequences
,
output_sequences_kwargs
))
self
.
assertEqual
(
output_sequences
.
shape
,
(
2
,
5
))
def
test_generate_encoder_outputs_attention_mask
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForSpeechSeq2Seq"
]
floats_tensor
=
self
.
framework_dependent_parameters
[
"floats_tensor"
]
create_tensor_fn
=
self
.
framework_dependent_parameters
[
"create_tensor_fn"
]
is_pt
=
not
model_cls
.
__name__
.
startswith
(
"TF"
)
input_features
=
floats_tensor
((
3
,
80
,
60
))
attention_mask
=
create_tensor_fn
(
np
.
ones
(
input_features
.
shape
))
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-WhisperForConditionalGeneration"
)
if
is_pt
:
input_features
=
input_features
.
to
(
torch_device
)
attention_mask
=
attention_mask
.
to
(
torch_device
)
model
=
model
.
to
(
torch_device
)
encoder
=
model
.
get_encoder
()
encoder_outputs
=
encoder
(
input_features
)
output_sequences_no_mask
=
model
.
generate
(
encoder_outputs
=
encoder_outputs
)
output_sequences_with_mask
=
model
.
generate
(
encoder_outputs
=
encoder_outputs
,
attention_mask
=
attention_mask
)
if
is_pt
:
output_sequences_no_mask
=
output_sequences_no_mask
.
cpu
().
numpy
()
output_sequences_with_mask
=
output_sequences_with_mask
.
cpu
().
numpy
()
self
.
assertTrue
(
np
.
array_equal
(
output_sequences_no_mask
,
output_sequences_with_mask
))
def
test_eos_token_id_int_and_list_greedy_search
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForCausalLM"
]
return_tensors
=
self
.
framework_dependent_parameters
[
"return_tensors"
]
is_pt
=
not
model_cls
.
__name__
.
startswith
(
"TF"
)
generation_kwargs
=
{
"do_sample"
:
False
,
"num_beams"
:
1
,
}
expectation
=
13
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
return_tensors
)
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
if
is_pt
:
model
=
model
.
to
(
torch_device
)
tokens
=
tokens
.
to
(
torch_device
)
eos_token_id
=
873
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
eos_token_id
=
[
873
,
198
]
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
def
test_eos_token_id_int_and_list_contrastive_search
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForCausalLM"
]
return_tensors
=
self
.
framework_dependent_parameters
[
"return_tensors"
]
is_pt
=
not
model_cls
.
__name__
.
startswith
(
"TF"
)
generation_kwargs
=
{
"do_sample"
:
False
,
"num_beams"
:
1
,
"penalty_alpha"
:
0.6
,
"top_k"
:
4
,
}
expectation
=
17
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
return_tensors
)
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
if
is_pt
:
model
=
model
.
to
(
torch_device
)
tokens
=
tokens
.
to
(
torch_device
)
eos_token_id
=
225
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
eos_token_id
=
[
225
,
198
]
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
def
test_eos_token_id_int_and_list_beam_search
(
self
):
model_cls
=
self
.
framework_dependent_parameters
[
"AutoModelForCausalLM"
]
return_tensors
=
self
.
framework_dependent_parameters
[
"return_tensors"
]
is_pt
=
not
model_cls
.
__name__
.
startswith
(
"TF"
)
generation_kwargs
=
{
"do_sample"
:
False
,
"num_beams"
:
3
,
}
expectation
=
13
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
return_tensors
)
model
=
model_cls
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
if
is_pt
:
model
=
model
.
to
(
torch_device
)
tokens
=
tokens
.
to
(
torch_device
)
eos_token_id
=
873
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
unpadded_correct_condition
=
expectation
==
len
(
generated_tokens
[
0
])
padded_correct_condition
=
expectation
<
len
(
generated_tokens
[
0
])
and
all
(
[
token
==
model
.
config
.
pad_token_id
for
token
in
generated_tokens
[
0
][
expectation
:]]
)
self
.
assertTrue
(
unpadded_correct_condition
or
padded_correct_condition
)
eos_token_id
=
[
873
,
198
]
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
unpadded_correct_condition
=
expectation
==
len
(
generated_tokens
[
0
])
padded_correct_condition
=
expectation
<
len
(
generated_tokens
[
0
])
and
all
(
[
token
==
model
.
config
.
pad_token_id
for
token
in
generated_tokens
[
0
][
expectation
:]]
)
self
.
assertTrue
(
unpadded_correct_condition
or
padded_correct_condition
)
tests/generation/test_tf_utils.py
View file @
eb6c59bc
...
@@ -19,6 +19,7 @@ import unittest
...
@@ -19,6 +19,7 @@ import unittest
from
transformers
import
is_tf_available
from
transformers
import
is_tf_available
from
transformers.testing_utils
import
require_tf
,
slow
from
transformers.testing_utils
import
require_tf
,
slow
from
..test_modeling_tf_common
import
floats_tensor
from
.test_framework_agnostic
import
GenerationIntegrationTestsMixin
from
.test_framework_agnostic
import
GenerationIntegrationTestsMixin
...
@@ -26,8 +27,11 @@ if is_tf_available():
...
@@ -26,8 +27,11 @@ if is_tf_available():
import
tensorflow
as
tf
import
tensorflow
as
tf
from
transformers
import
(
from
transformers
import
(
AutoTokenizer
,
TFAutoModelForCausalLM
,
TFAutoModelForCausalLM
,
TFAutoModelForSeq2SeqLM
,
TFAutoModelForSeq2SeqLM
,
TFAutoModelForSpeechSeq2Seq
,
TFAutoModelForVision2Seq
,
TFLogitsProcessorList
,
TFLogitsProcessorList
,
TFMinLengthLogitsProcessor
,
TFMinLengthLogitsProcessor
,
tf_top_k_top_p_filtering
,
tf_top_k_top_p_filtering
,
...
@@ -136,15 +140,19 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
...
@@ -136,15 +140,19 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
if
is_tf_available
():
if
is_tf_available
():
framework_dependent_parameters
=
{
framework_dependent_parameters
=
{
"AutoModelForCausalLM"
:
TFAutoModelForCausalLM
,
"AutoModelForCausalLM"
:
TFAutoModelForCausalLM
,
"AutoModelForSpeechSeq2Seq"
:
TFAutoModelForSpeechSeq2Seq
,
"AutoModelForSeq2SeqLM"
:
TFAutoModelForSeq2SeqLM
,
"AutoModelForSeq2SeqLM"
:
TFAutoModelForSeq2SeqLM
,
"AutoModelForVision2Seq"
:
TFAutoModelForVision2Seq
,
"LogitsProcessorList"
:
TFLogitsProcessorList
,
"LogitsProcessorList"
:
TFLogitsProcessorList
,
"MinLengthLogitsProcessor"
:
TFMinLengthLogitsProcessor
,
"MinLengthLogitsProcessor"
:
TFMinLengthLogitsProcessor
,
"create_tensor_fn"
:
tf
.
convert_to_tensor
,
"create_tensor_fn"
:
tf
.
convert_to_tensor
,
"floats_tensor"
:
floats_tensor
,
"return_tensors"
:
"tf"
,
"return_tensors"
:
"tf"
,
}
}
@
slow
@
slow
def
test_generate_tf_function_export_fixed_input_length
(
self
):
def
test_generate_tf_function_export_fixed_input_length
(
self
):
# TF-only test: tf.saved_model export
test_model
=
TFAutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
test_model
=
TFAutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
input_length
=
2
input_length
=
2
max_new_tokens
=
2
max_new_tokens
=
2
...
@@ -187,6 +195,7 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
...
@@ -187,6 +195,7 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
@
slow
@
slow
def
test_generate_tf_function_export_fixed_batch_size
(
self
):
def
test_generate_tf_function_export_fixed_batch_size
(
self
):
# TF-only test: tf.saved_model export
test_model
=
TFAutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
test_model
=
TFAutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
batch_size
=
1
batch_size
=
1
max_new_tokens
=
2
max_new_tokens
=
2
...
@@ -226,3 +235,32 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
...
@@ -226,3 +235,32 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
tf_func_outputs
=
serving_func
(
**
inputs
)[
"sequences"
]
tf_func_outputs
=
serving_func
(
**
inputs
)[
"sequences"
]
tf_model_outputs
=
test_model
.
generate
(
**
inputs
,
max_new_tokens
=
max_new_tokens
)
tf_model_outputs
=
test_model
.
generate
(
**
inputs
,
max_new_tokens
=
max_new_tokens
)
tf
.
debugging
.
assert_equal
(
tf_func_outputs
,
tf_model_outputs
)
tf
.
debugging
.
assert_equal
(
tf_func_outputs
,
tf_model_outputs
)
def
test_eos_token_id_int_and_list_top_k_top_sampling
(
self
):
# Has PT equivalent: this test relies on random sampling
generation_kwargs
=
{
"do_sample"
:
True
,
"num_beams"
:
1
,
"top_p"
:
0.7
,
"top_k"
:
10
,
"temperature"
:
0.7
,
}
expectation
=
14
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
"tf"
)
model
=
TFAutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
eos_token_id
=
638
# forces the generation to happen on CPU, to avoid GPU-related quirks
with
tf
.
device
(
":/CPU:0"
):
tf
.
random
.
set_seed
(
0
)
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
eos_token_id
=
[
638
,
198
]
with
tf
.
device
(
":/CPU:0"
):
tf
.
random
.
set_seed
(
0
)
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
tests/generation/test_utils.py
View file @
eb6c59bc
...
@@ -30,15 +30,15 @@ if is_torch_available():
...
@@ -30,15 +30,15 @@ if is_torch_available():
from
transformers
import
(
from
transformers
import
(
AutoModelForCausalLM
,
AutoModelForCausalLM
,
AutoModelForSeq2SeqLM
,
AutoModelForSeq2SeqLM
,
AutoModelForSpeechSeq2Seq
,
AutoModelForVision2Seq
,
AutoTokenizer
,
AutoTokenizer
,
BartForConditionalGeneration
,
BartForConditionalGeneration
,
BartTokenizer
,
BartTokenizer
,
GPT2LMHeadModel
,
GPT2LMHeadModel
,
GPT2Tokenizer
,
GPT2Tokenizer
,
ImageGPTForCausalImageModeling
,
ImageGPTForCausalImageModeling
,
Speech2TextForConditionalGeneration
,
SpeechEncoderDecoderModel
,
SpeechEncoderDecoderModel
,
VisionEncoderDecoderModel
,
top_k_top_p_filtering
,
top_k_top_p_filtering
,
)
)
from
transformers.generation
import
(
from
transformers.generation
import
(
...
@@ -1790,10 +1790,13 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -1790,10 +1790,13 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
if
is_torch_available
():
if
is_torch_available
():
framework_dependent_parameters
=
{
framework_dependent_parameters
=
{
"AutoModelForCausalLM"
:
AutoModelForCausalLM
,
"AutoModelForCausalLM"
:
AutoModelForCausalLM
,
"AutoModelForSpeechSeq2Seq"
:
AutoModelForSpeechSeq2Seq
,
"AutoModelForSeq2SeqLM"
:
AutoModelForSeq2SeqLM
,
"AutoModelForSeq2SeqLM"
:
AutoModelForSeq2SeqLM
,
"AutoModelForVision2Seq"
:
AutoModelForVision2Seq
,
"LogitsProcessorList"
:
LogitsProcessorList
,
"LogitsProcessorList"
:
LogitsProcessorList
,
"MinLengthLogitsProcessor"
:
MinLengthLogitsProcessor
,
"MinLengthLogitsProcessor"
:
MinLengthLogitsProcessor
,
"create_tensor_fn"
:
torch
.
tensor
,
"create_tensor_fn"
:
torch
.
tensor
,
"floats_tensor"
:
floats_tensor
,
"return_tensors"
:
"pt"
,
"return_tensors"
:
"pt"
,
}
}
...
@@ -2093,7 +2096,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2093,7 +2096,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
self
.
assertEqual
(
output
,
[{
"generated_text"
:
"Hello I believe in in in number"
}])
self
.
assertEqual
(
output
,
[{
"generated_text"
:
"Hello I believe in in in number"
}])
def
test_generate_non_nlp_input_ids_as_kwarg
(
self
):
def
test_generate_non_nlp_input_ids_as_kwarg
(
self
):
# PT-only test: AFAIK there
i
s no non-NLP model architecture in TF that supports `input_ids` as its only input
# PT-only test: AFAIK there
'
s no non-NLP model architecture in TF that supports `input_ids` as its only input
model
=
ImageGPTForCausalImageModeling
.
from_pretrained
(
model
=
ImageGPTForCausalImageModeling
.
from_pretrained
(
"hf-internal-testing/tiny-random-imagegpt"
,
max_length
=
10
"hf-internal-testing/tiny-random-imagegpt"
,
max_length
=
10
).
to
(
torch_device
)
).
to
(
torch_device
)
...
@@ -2105,17 +2108,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2105,17 +2108,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
self
.
assertListEqual
(
output_sequences
.
tolist
(),
output_sequences_kwargs
.
tolist
())
self
.
assertListEqual
(
output_sequences
.
tolist
(),
output_sequences_kwargs
.
tolist
())
self
.
assertEqual
(
output_sequences
.
shape
,
(
3
,
10
))
self
.
assertEqual
(
output_sequences
.
shape
,
(
3
,
10
))
def
test_generate_too_many_encoder_kwargs
(
self
):
article
=
"""I need input_ids to generate"""
tokenizer
=
BartTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
)
model
=
BartForConditionalGeneration
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
,
max_length
=
10
).
to
(
torch_device
)
input_ids
=
tokenizer
(
article
,
return_tensors
=
"pt"
).
input_ids
.
to
(
torch_device
)
with
self
.
assertRaises
(
ValueError
):
model
.
generate
(
input_ids
=
input_ids
,
inputs_embeds
=
input_ids
)
def
test_generate_input_values_as_encoder_kwarg
(
self
):
def
test_generate_input_values_as_encoder_kwarg
(
self
):
# PT-only test: AFAIK there's no generate-capable architecture in TF that supports `input_values` as its input
input_values
=
floats_tensor
((
2
,
250
))
input_values
=
floats_tensor
((
2
,
250
))
model
=
SpeechEncoderDecoderModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-speech-encoder-decoder"
)
model
=
SpeechEncoderDecoderModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-speech-encoder-decoder"
)
model
=
model
.
to
(
torch_device
)
model
=
model
.
to
(
torch_device
)
...
@@ -2125,43 +2119,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2125,43 +2119,8 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
self
.
assertListEqual
(
output_sequences
.
tolist
(),
output_sequences_kwargs
.
tolist
())
self
.
assertListEqual
(
output_sequences
.
tolist
(),
output_sequences_kwargs
.
tolist
())
self
.
assertEqual
(
output_sequences
.
shape
,
(
2
,
5
))
self
.
assertEqual
(
output_sequences
.
shape
,
(
2
,
5
))
def
test_generate_input_features_as_encoder_kwarg
(
self
):
input_features
=
floats_tensor
((
3
,
20
,
24
))
model
=
Speech2TextForConditionalGeneration
.
from_pretrained
(
"hf-internal-testing/tiny-random-speech_to_text"
)
model
=
model
.
to
(
torch_device
)
output_sequences_kwargs
=
model
.
generate
(
input_features
=
input_features
,
max_length
=
5
).
cpu
()
output_sequences
=
model
.
generate
(
input_features
,
max_length
=
5
).
cpu
()
self
.
assertListEqual
(
output_sequences
.
tolist
(),
output_sequences_kwargs
.
tolist
())
self
.
assertEqual
(
output_sequences
.
shape
,
(
3
,
5
))
def
test_generate_pixel_values_as_encoder_kwarg
(
self
):
pixel_values
=
floats_tensor
((
2
,
3
,
30
,
30
))
model
=
VisionEncoderDecoderModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-vision-encoder-decoder"
)
model
=
model
.
to
(
torch_device
)
output_sequences_kwargs
=
model
.
generate
(
pixel_values
=
pixel_values
,
max_length
=
5
).
cpu
()
output_sequences
=
model
.
generate
(
pixel_values
,
max_length
=
5
).
cpu
()
self
.
assertListEqual
(
output_sequences
.
tolist
(),
output_sequences_kwargs
.
tolist
())
self
.
assertEqual
(
output_sequences
.
shape
,
(
2
,
5
))
def
test_generate_encoder_outputs_attention_mask
(
self
):
input_values
=
floats_tensor
((
2
,
250
)).
to
(
torch_device
)
attention_mask
=
torch
.
ones_like
(
input_values
)
model
=
SpeechEncoderDecoderModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-speech-encoder-decoder"
)
model
=
model
.
to
(
torch_device
)
encoder
=
model
.
get_encoder
()
encoder_outputs
=
encoder
(
input_values
)
output_sequences_no_mask
=
model
.
generate
(
encoder_outputs
=
encoder_outputs
).
cpu
()
output_sequences_with_mask
=
model
.
generate
(
encoder_outputs
=
encoder_outputs
,
attention_mask
=
attention_mask
)
output_sequences_with_mask
=
output_sequences_with_mask
.
cpu
()
self
.
assertListEqual
(
output_sequences_no_mask
.
tolist
(),
output_sequences_with_mask
.
tolist
())
def
test_transition_scores_group_beam_search_encoder_decoder
(
self
):
def
test_transition_scores_group_beam_search_encoder_decoder
(
self
):
# PT-only test: TF doesn't have group beam search
articles
=
[
articles
=
[
"Justin Timberlake and Jessica Biel, welcome to parenthood."
,
"Justin Timberlake and Jessica Biel, welcome to parenthood."
,
"Michael Phelps is arguably the most decorated Olympian of all time."
,
"Michael Phelps is arguably the most decorated Olympian of all time."
,
...
@@ -2188,64 +2147,9 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2188,64 +2147,9 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
self
.
assertTrue
(
torch
.
allclose
(
transition_scores_sum
,
outputs
.
sequences_scores
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
transition_scores_sum
,
outputs
.
sequences_scores
,
atol
=
1e-3
))
def
test_log_scores_sample_decoder_only
(
self
):
articles
=
[
"I need input_ids to generate"
,
"Short and"
]
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
tokenizer
.
padding_side
=
"left"
tokenizer
.
pad_token
=
tokenizer
.
eos_token
model
=
GPT2LMHeadModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
).
to
(
torch_device
)
inputs
=
tokenizer
(
articles
,
return_tensors
=
"pt"
,
padding
=
True
).
to
(
torch_device
)
result
=
model
.
generate
(
**
inputs
,
max_length
=
15
,
return_dict_in_generate
=
True
,
do_sample
=
False
,
output_scores
=
True
,
)
# decoder-only starts generating from `input_ids`
begin_generation
=
inputs
.
input_ids
.
shape
[
-
1
]
gen_sequences
=
result
.
sequences
[:,
begin_generation
:]
probs
=
torch
.
stack
(
result
.
scores
,
dim
=
1
).
softmax
(
-
1
)
gen_probs
=
torch
.
gather
(
probs
,
2
,
gen_sequences
[:,
:,
None
]).
squeeze
(
-
1
)
expected_probs
=
torch
.
tensor
([[
0.0014
,
0.0015
],
[
0.0014
,
0.0014
]])
self
.
assertTrue
(
torch
.
allclose
(
gen_probs
.
cpu
(),
expected_probs
,
atol
=
1e-3
))
def
test_log_scores_sample_encoder_decoder
(
self
):
articles
=
[
"I need input_ids to generate"
,
"Short and"
]
tokenizer
=
BartTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
)
model
=
BartForConditionalGeneration
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
).
to
(
torch_device
)
inputs
=
tokenizer
(
articles
,
return_tensors
=
"pt"
,
padding
=
True
).
to
(
torch_device
)
result
=
model
.
generate
(
**
inputs
,
max_length
=
3
,
return_dict_in_generate
=
True
,
do_sample
=
False
,
num_beams
=
1
,
output_scores
=
True
,
)
# encoder-decoder has one decoder_start_token_id by default
begin_generation
=
1
gen_sequences
=
result
.
sequences
[:,
begin_generation
:]
probs
=
torch
.
stack
(
result
.
scores
,
dim
=
1
).
softmax
(
-
1
)
gen_probs
=
torch
.
gather
(
probs
,
2
,
gen_sequences
[:,
:,
None
]).
squeeze
(
-
1
)
expected_probs
=
torch
.
tensor
([[
0.0013
,
1.0000
],
[
0.0013
,
1.0000
]])
self
.
assertTrue
(
torch
.
allclose
(
gen_probs
.
cpu
(),
expected_probs
,
atol
=
1e-3
))
@
slow
@
slow
def
test_beam_search_example_integration
(
self
):
def
test_beam_search_example_integration
(
self
):
# PT-only test: TF doesn't have a BeamSearchScorer
# exactly the example provided in the docstrings of beam search, which previously
# exactly the example provided in the docstrings of beam search, which previously
# failed after directly copying from it. Refer to PR #15555
# failed after directly copying from it. Refer to PR #15555
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"t5-base"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"t5-base"
)
...
@@ -2288,6 +2192,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2288,6 +2192,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
@
slow
@
slow
def
test_constrained_beam_search
(
self
):
def
test_constrained_beam_search
(
self
):
# PT-only test: TF doesn't have constrained beam search
model
=
GPT2LMHeadModel
.
from_pretrained
(
"gpt2"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"gpt2"
).
to
(
torch_device
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"gpt2"
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"gpt2"
)
...
@@ -2325,6 +2230,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2325,6 +2230,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
@
slow
@
slow
def
test_constrained_beam_search_mixed
(
self
):
def
test_constrained_beam_search_mixed
(
self
):
# PT-only test: TF doesn't have constrained beam search
model
=
GPT2LMHeadModel
.
from_pretrained
(
"gpt2"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"gpt2"
).
to
(
torch_device
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"gpt2"
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"gpt2"
)
...
@@ -2365,6 +2271,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2365,6 +2271,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
@
slow
@
slow
def
test_constrained_beam_search_mixed_mixin
(
self
):
def
test_constrained_beam_search_mixed_mixin
(
self
):
# PT-only test: TF doesn't have constrained beam search
model
=
GPT2LMHeadModel
.
from_pretrained
(
"gpt2"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"gpt2"
).
to
(
torch_device
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"gpt2"
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"gpt2"
)
...
@@ -2402,6 +2309,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2402,6 +2309,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
@
slow
@
slow
def
test_constrained_beam_search_example_translation_mixin
(
self
):
def
test_constrained_beam_search_example_translation_mixin
(
self
):
# PT-only test: TF doesn't have constrained beam search
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"t5-base"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"t5-base"
)
model
=
AutoModelForSeq2SeqLM
.
from_pretrained
(
"t5-base"
)
model
=
AutoModelForSeq2SeqLM
.
from_pretrained
(
"t5-base"
)
...
@@ -2426,6 +2334,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2426,6 +2334,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
@
slow
@
slow
def
test_constrained_beam_search_example_integration
(
self
):
def
test_constrained_beam_search_example_integration
(
self
):
# PT-only test: TF doesn't have constrained beam search
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"t5-base"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"t5-base"
)
model
=
AutoModelForSeq2SeqLM
.
from_pretrained
(
"t5-base"
)
model
=
AutoModelForSeq2SeqLM
.
from_pretrained
(
"t5-base"
)
...
@@ -2469,6 +2378,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2469,6 +2378,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
self
.
assertListEqual
(
outputs
,
[
"Wie alt sind Sie?"
])
self
.
assertListEqual
(
outputs
,
[
"Wie alt sind Sie?"
])
def
test_constrained_beam_search_mixin_type_checks
(
self
):
def
test_constrained_beam_search_mixin_type_checks
(
self
):
# PT-only test: TF doesn't have constrained beam search
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"patrickvonplaten/t5-tiny-random"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"patrickvonplaten/t5-tiny-random"
)
model
=
AutoModelForSeq2SeqLM
.
from_pretrained
(
"patrickvonplaten/t5-tiny-random"
)
model
=
AutoModelForSeq2SeqLM
.
from_pretrained
(
"patrickvonplaten/t5-tiny-random"
)
...
@@ -2509,6 +2419,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2509,6 +2419,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
model
.
generate
(
input_ids
,
force_words_ids
=
[[[
-
1
]]])
model
.
generate
(
input_ids
,
force_words_ids
=
[[[
-
1
]]])
def
test_contrastive_search_batched
(
self
):
def
test_contrastive_search_batched
(
self
):
# PT-only test: TF doesn't have constrained beam search
# Tests that contrastive search works with batched inputs (i.e. has the same output as for non-batched inputs)
# Tests that contrastive search works with batched inputs (i.e. has the same output as for non-batched inputs)
articles
=
[
"Foo"
,
"Bar Baz"
]
articles
=
[
"Foo"
,
"Bar Baz"
]
tokenizer
=
BartTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
)
tokenizer
=
BartTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-bart"
)
...
@@ -2533,55 +2444,32 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2533,55 +2444,32 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
max_score_diff
=
(
output_sequences_batched
.
scores
[
0
][
1
]
-
output_sequences
.
scores
[
0
][
0
]).
abs
().
max
()
max_score_diff
=
(
output_sequences_batched
.
scores
[
0
][
1
]
-
output_sequences
.
scores
[
0
][
0
]).
abs
().
max
()
self
.
assertTrue
(
max_score_diff
<
1e-5
)
self
.
assertTrue
(
max_score_diff
<
1e-5
)
def
test_eos_token_id_int_and_list_greedy_search
(
self
):
def
test_generate_from_input_embeds_decoder_only
(
self
):
generation_kwargs
=
{
# PT-only test: TF doesn't have a model with support to generate from input embeds (yet ;))
"do_sample"
:
False
,
# Note: the model must support generation from input embeddings
"num_beams"
:
1
,
model
=
AutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
}
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
expectation
=
13
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
"pt"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
).
to
(
torch_device
)
torch
.
manual_seed
(
0
)
eos_token_id
=
873
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
torch
.
manual_seed
(
0
)
eos_token_id
=
[
873
,
198
]
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
def
test_eos_token_id_int_and_list_contrastive_search
(
self
):
generation_kwargs
=
{
"do_sample"
:
False
,
"num_beams"
:
1
,
"penalty_alpha"
:
0.6
,
"top_k"
:
4
,
}
expectation
=
17
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"Hello world"
text
=
"""Hello, my dog is cute and"""
input_ids
=
tokenizer
.
encode
(
text
,
return_tensors
=
"pt"
)
tokens
=
tokenizer
(
text
,
return_tensors
=
"pt"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
).
to
(
torch_device
)
# Traditional way of generating text
outputs_from_ids
=
model
.
generate
(
input_ids
)
torch
.
manual_seed
(
0
)
# Same thing, but from input embeddings
eos_token_id
=
225
inputs_embeds
=
model
.
transformer
.
wte
(
input_ids
)
generated_token
s
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwarg
s
)
outputs_from_embed
s
=
model
.
generate
(
input_ids
,
inputs_embeds
=
inputs_embed
s
)
self
.
assert
True
(
expectation
==
len
(
generated_tokens
[
0
]
))
self
.
assert
ListEqual
(
outputs_from_ids
.
tolist
(),
outputs_from_embeds
.
tolist
(
))
# But if we pass different inputs_embeds, we should get different outputs
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
eos_token_id
=
[
225
,
198
]
random_embeds
=
torch
.
rand_like
(
inputs_embeds
)
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
outputs_from_rand_embeds
=
model
.
generate
(
input_ids
,
inputs_embeds
=
random_embeds
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
with
self
.
assertRaises
(
AssertionError
):
self
.
assertListEqual
(
outputs_from_rand_embeds
.
tolist
(),
outputs_from_embeds
.
tolist
())
def
test_eos_token_id_int_and_list_top_k_top_sampling
(
self
):
def
test_eos_token_id_int_and_list_top_k_top_sampling
(
self
):
# Has TF equivalent: this test relies on random sampling
generation_kwargs
=
{
generation_kwargs
=
{
"do_sample"
:
True
,
"do_sample"
:
True
,
"num_beams"
:
1
,
"num_beams"
:
1
,
...
@@ -2591,11 +2479,10 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2591,11 +2479,10 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
}
}
expectation
=
15
expectation
=
15
tokenizer
=
GPT2
Tokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
tokenizer
=
Auto
Tokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
"pt"
).
to
(
torch_device
)
tokens
=
tokenizer
(
text
,
return_tensors
=
"pt"
).
to
(
torch_device
)
model
=
AutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
).
to
(
torch_device
)
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
eos_token_id
=
846
eos_token_id
=
846
...
@@ -2606,49 +2493,3 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
...
@@ -2606,49 +2493,3 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
eos_token_id
=
[
846
,
198
]
eos_token_id
=
[
846
,
198
]
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
def
test_eos_token_id_int_and_list_beam_search
(
self
):
generation_kwargs
=
{
"do_sample"
:
False
,
"num_beams"
:
3
,
}
expectation
=
13
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"""Hello, my dog is cute and"""
tokens
=
tokenizer
(
text
,
return_tensors
=
"pt"
).
to
(
torch_device
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
).
to
(
torch_device
)
torch
.
manual_seed
(
0
)
eos_token_id
=
873
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
torch
.
manual_seed
(
0
)
eos_token_id
=
[
873
,
198
]
generated_tokens
=
model
.
generate
(
**
tokens
,
eos_token_id
=
eos_token_id
,
**
generation_kwargs
)
self
.
assertTrue
(
expectation
==
len
(
generated_tokens
[
0
]))
def
test_generate_from_input_embeds_decoder_only
(
self
):
# Note: the model must support generation from input embeddings
model
=
AutoModelForCausalLM
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-gpt2"
)
text
=
"Hello world"
input_ids
=
tokenizer
.
encode
(
text
,
return_tensors
=
"pt"
)
# Traditional way of generating text
outputs_from_ids
=
model
.
generate
(
input_ids
)
# Same thing, but from input embeddings
inputs_embeds
=
model
.
transformer
.
wte
(
input_ids
)
outputs_from_embeds
=
model
.
generate
(
input_ids
,
inputs_embeds
=
inputs_embeds
)
self
.
assertListEqual
(
outputs_from_ids
.
tolist
(),
outputs_from_embeds
.
tolist
())
# But if we pass different inputs_embeds, we should get different outputs
torch
.
manual_seed
(
0
)
random_embeds
=
torch
.
rand_like
(
inputs_embeds
)
outputs_from_rand_embeds
=
model
.
generate
(
input_ids
,
inputs_embeds
=
random_embeds
)
with
self
.
assertRaises
(
AssertionError
):
self
.
assertListEqual
(
outputs_from_rand_embeds
.
tolist
(),
outputs_from_embeds
.
tolist
())
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment