Unverified Commit 2e27291c authored by Joao Gante's avatar Joao Gante Committed by GitHub
Browse files

Generate: assistant should be greedy in assisted decoding (#30778)



* assistant should be greedy

* better comment

* Update src/transformers/generation/candidate_generator.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent 94306352
...@@ -150,6 +150,12 @@ class AssistedCandidateGenerator(CandidateGenerator): ...@@ -150,6 +150,12 @@ class AssistedCandidateGenerator(CandidateGenerator):
self.generation_config.return_dict_in_generate = True self.generation_config.return_dict_in_generate = True
self.generation_config.output_scores = True self.generation_config.output_scores = True
# Disable sampling -- this implementation of assisted generation/speculative decoding uses the assistant
# greedily to maximize matches. Disables sampling-related flags to prevent warnings
self.generation_config.do_sample = False
for attr in ("temperature", "top_p", "min_p", "typical_p", "top_k", "epsilon_cutoff", "eta_cutoff"):
setattr(self.generation_config, attr, None)
# avoid unnecessary warnings that min_length is larger than max_new_tokens # avoid unnecessary warnings that min_length is larger than max_new_tokens
# remove the `MinLengthLogitsProcessor` if exists (NOTE: no need to check for `MinNewTokensLogitsProcessor`) # remove the `MinLengthLogitsProcessor` if exists (NOTE: no need to check for `MinNewTokensLogitsProcessor`)
self.main_model_min_length = self.generation_config.min_length self.main_model_min_length = self.generation_config.min_length
......
...@@ -496,6 +496,11 @@ class GenerationConfig(PushToHubMixin): ...@@ -496,6 +496,11 @@ class GenerationConfig(PushToHubMixin):
greedy_wrong_parameter_msg.format(flag_name="top_p", flag_value=self.top_p), greedy_wrong_parameter_msg.format(flag_name="top_p", flag_value=self.top_p),
UserWarning, UserWarning,
) )
if self.min_p is not None:
warnings.warn(
greedy_wrong_parameter_msg.format(flag_name="min_p", flag_value=self.min_p),
UserWarning,
)
if self.typical_p is not None and self.typical_p != 1.0: if self.typical_p is not None and self.typical_p != 1.0:
warnings.warn( warnings.warn(
greedy_wrong_parameter_msg.format(flag_name="typical_p", flag_value=self.typical_p), greedy_wrong_parameter_msg.format(flag_name="typical_p", flag_value=self.typical_p),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment