Unverified Commit 2406dbdc authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Less flaky `test_assisted_decoding_matches_greedy_search` (#23451)



* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 21f7e81b
...@@ -1477,46 +1477,57 @@ class GenerationTesterMixin: ...@@ -1477,46 +1477,57 @@ class GenerationTesterMixin:
): ):
return return
# enable cache # This for loop is a naive and temporary effort to make the test less flaky.
config, input_ids, attention_mask, max_length = self._get_input_ids_and_config(batch_size=1) failed = 0
for i in range(10):
# enable cache
config, input_ids, attention_mask, max_length = self._get_input_ids_and_config(batch_size=1)
# NOTE: assisted generation only works with cache on at the moment.
if not hasattr(config, "use_cache"):
return
config.use_cache = True
config.is_decoder = True
model = model_class(config).to(torch_device).eval()
output_greedy = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=max_length,
num_beams=1,
do_sample=False,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
return_dict_in_generate=True,
)
# Note: with assisted generate, if the same model is used as assistant, then all assistant tokens will
# be correct
output_assisted = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=max_length,
num_beams=1,
do_sample=False,
assistant_model=model,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
return_dict_in_generate=True,
)
# NOTE: assisted generation only works with cache on at the moment. try:
if not hasattr(config, "use_cache"): self.assertListEqual(output_greedy.sequences.tolist(), output_assisted.sequences.tolist())
return
config.use_cache = True for output in (output_greedy, output_assisted):
config.is_decoder = True self._check_outputs(output, input_ids, model.config, use_cache=True)
model = model_class(config).to(torch_device).eval() except AssertionError:
output_greedy = model.generate( failed += 1
input_ids, if failed > 1:
attention_mask=attention_mask, self.assertListEqual(output_greedy.sequences.tolist(), output_assisted.sequences.tolist())
max_length=max_length,
num_beams=1,
do_sample=False,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
return_dict_in_generate=True,
)
# Note: with assisted generate, if the same model is used as assistant, then all assistant tokens will
# be correct
output_assisted = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=max_length,
num_beams=1,
do_sample=False,
assistant_model=model,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
return_dict_in_generate=True,
)
self.assertListEqual(output_greedy.sequences.tolist(), output_assisted.sequences.tolist()) for output in (output_greedy, output_assisted):
self._check_outputs(output, input_ids, model.config, use_cache=True)
for output in (output_greedy, output_assisted):
self._check_outputs(output, input_ids, model.config, use_cache=True)
def test_assisted_decoding_sample(self): def test_assisted_decoding_sample(self):
# Seeded assisted decoding will not match sample for the same seed, as the forward pass does not return the # Seeded assisted decoding will not match sample for the same seed, as the forward pass does not return the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment