Unverified Commit 53d89124 authored by Joao Gante's avatar Joao Gante Committed by GitHub
Browse files

Pipeline: use tokenizer pad token at generation time if the model pad token is unset. (#29614)

parent c47fcd08
...@@ -311,14 +311,14 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): ...@@ -311,14 +311,14 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
forward_params = defaultdict(dict) forward_params = defaultdict(dict)
if max_new_tokens is not None: if max_new_tokens is not None:
forward_params["generate_kwargs"]["max_new_tokens"] = max_new_tokens forward_params["max_new_tokens"] = max_new_tokens
if generate_kwargs is not None: if generate_kwargs is not None:
if max_new_tokens is not None and "max_new_tokens" in generate_kwargs: if max_new_tokens is not None and "max_new_tokens" in generate_kwargs:
raise ValueError( raise ValueError(
"`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use" "`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use"
" only 1 version" " only 1 version"
) )
forward_params["generate_kwargs"].update(generate_kwargs) forward_params.update(generate_kwargs)
postprocess_params = {} postprocess_params = {}
if decoder_kwargs is not None: if decoder_kwargs is not None:
...@@ -456,10 +456,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): ...@@ -456,10 +456,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
processed["stride"] = stride processed["stride"] = stride
yield {"is_last": True, **processed, **extra} yield {"is_last": True, **processed, **extra}
def _forward(self, model_inputs, return_timestamps=False, generate_kwargs=None): def _forward(self, model_inputs, return_timestamps=False, **generate_kwargs):
if generate_kwargs is None:
generate_kwargs = {}
attention_mask = model_inputs.pop("attention_mask", None) attention_mask = model_inputs.pop("attention_mask", None)
stride = model_inputs.pop("stride", None) stride = model_inputs.pop("stride", None)
is_last = model_inputs.pop("is_last") is_last = model_inputs.pop("is_last")
......
...@@ -885,6 +885,16 @@ class Pipeline(_ScikitCompat): ...@@ -885,6 +885,16 @@ class Pipeline(_ScikitCompat):
self._num_workers = kwargs.pop("num_workers", None) self._num_workers = kwargs.pop("num_workers", None)
self._preprocess_params, self._forward_params, self._postprocess_params = self._sanitize_parameters(**kwargs) self._preprocess_params, self._forward_params, self._postprocess_params = self._sanitize_parameters(**kwargs)
# Pipelines calling `generate`: if the tokenizer has a pad token but the model doesn't, set it in the
# forward params so that `generate` is aware of the pad token.
if (
self.tokenizer is not None
and self.model.can_generate()
and self.tokenizer.pad_token_id is not None
and self.model.generation_config.pad_token_id is None
):
self._forward_params["pad_token_id"] = self.tokenizer.pad_token_id
if self.image_processor is None and self.feature_extractor is not None: if self.image_processor is None and self.feature_extractor is not None:
if isinstance(self.feature_extractor, BaseImageProcessor): if isinstance(self.feature_extractor, BaseImageProcessor):
# Backward compatible change, if users called # Backward compatible change, if users called
......
...@@ -196,9 +196,7 @@ class Conversation: ...@@ -196,9 +196,7 @@ class Conversation:
build_pipeline_init_args(has_tokenizer=True), build_pipeline_init_args(has_tokenizer=True),
r""" r"""
min_length_for_response (`int`, *optional*, defaults to 32): min_length_for_response (`int`, *optional*, defaults to 32):
The minimum length (in number of tokens) for a response. The minimum length (in number of tokens) for a response.""",
minimum_tokens (`int`, *optional*, defaults to 10):
The minimum length of tokens to leave for a response.""",
) )
class ConversationalPipeline(Pipeline): class ConversationalPipeline(Pipeline):
""" """
...@@ -241,17 +239,13 @@ class ConversationalPipeline(Pipeline): ...@@ -241,17 +239,13 @@ class ConversationalPipeline(Pipeline):
if self.tokenizer.pad_token_id is None: if self.tokenizer.pad_token_id is None:
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
def _sanitize_parameters( def _sanitize_parameters(self, min_length_for_response=None, clean_up_tokenization_spaces=None, **generate_kwargs):
self, min_length_for_response=None, minimum_tokens=None, clean_up_tokenization_spaces=None, **generate_kwargs
):
preprocess_params = {} preprocess_params = {}
forward_params = {} forward_params = {}
postprocess_params = {} postprocess_params = {}
if min_length_for_response is not None: if min_length_for_response is not None:
preprocess_params["min_length_for_response"] = min_length_for_response preprocess_params["min_length_for_response"] = min_length_for_response
if minimum_tokens is not None:
forward_params["minimum_tokens"] = minimum_tokens
if "max_length" in generate_kwargs: if "max_length" in generate_kwargs:
forward_params["max_length"] = generate_kwargs["max_length"] forward_params["max_length"] = generate_kwargs["max_length"]
...@@ -304,7 +298,7 @@ class ConversationalPipeline(Pipeline): ...@@ -304,7 +298,7 @@ class ConversationalPipeline(Pipeline):
input_ids = tf.constant([input_ids]) input_ids = tf.constant([input_ids])
return {"input_ids": input_ids, "conversation": conversation} return {"input_ids": input_ids, "conversation": conversation}
def _forward(self, model_inputs, minimum_tokens=10, **generate_kwargs): def _forward(self, model_inputs, **generate_kwargs):
n = model_inputs["input_ids"].shape[1] n = model_inputs["input_ids"].shape[1]
conversation = model_inputs.pop("conversation") conversation = model_inputs.pop("conversation")
if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs: if "max_length" not in generate_kwargs and "max_new_tokens" not in generate_kwargs:
......
...@@ -419,14 +419,14 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): ...@@ -419,14 +419,14 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
"is_last": span_idx == num_spans - 1, "is_last": span_idx == num_spans - 1,
} }
def _forward(self, model_inputs): def _forward(self, model_inputs, **generate_kwargs):
p_mask = model_inputs.pop("p_mask", None) p_mask = model_inputs.pop("p_mask", None)
word_ids = model_inputs.pop("word_ids", None) word_ids = model_inputs.pop("word_ids", None)
words = model_inputs.pop("words", None) words = model_inputs.pop("words", None)
is_last = model_inputs.pop("is_last", False) is_last = model_inputs.pop("is_last", False)
if self.model_type == ModelType.VisionEncoderDecoder: if self.model_type == ModelType.VisionEncoderDecoder:
model_outputs = self.model.generate(**model_inputs) model_outputs = self.model.generate(**model_inputs, **generate_kwargs)
else: else:
model_outputs = self.model(**model_inputs) model_outputs = self.model(**model_inputs)
......
...@@ -74,7 +74,7 @@ class ImageToTextPipeline(Pipeline): ...@@ -74,7 +74,7 @@ class ImageToTextPipeline(Pipeline):
) )
def _sanitize_parameters(self, max_new_tokens=None, generate_kwargs=None, prompt=None, timeout=None): def _sanitize_parameters(self, max_new_tokens=None, generate_kwargs=None, prompt=None, timeout=None):
forward_kwargs = {} forward_params = {}
preprocess_params = {} preprocess_params = {}
if prompt is not None: if prompt is not None:
...@@ -82,18 +82,17 @@ class ImageToTextPipeline(Pipeline): ...@@ -82,18 +82,17 @@ class ImageToTextPipeline(Pipeline):
if timeout is not None: if timeout is not None:
preprocess_params["timeout"] = timeout preprocess_params["timeout"] = timeout
if generate_kwargs is not None:
forward_kwargs["generate_kwargs"] = generate_kwargs
if max_new_tokens is not None: if max_new_tokens is not None:
if "generate_kwargs" not in forward_kwargs: forward_params["max_new_tokens"] = max_new_tokens
forward_kwargs["generate_kwargs"] = {} if generate_kwargs is not None:
if "max_new_tokens" in forward_kwargs["generate_kwargs"]: if max_new_tokens is not None and "max_new_tokens" in generate_kwargs:
raise ValueError( raise ValueError(
"'max_new_tokens' is defined twice, once in 'generate_kwargs' and once as a direct parameter," "`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use"
" please use only one" " only 1 version"
) )
forward_kwargs["generate_kwargs"]["max_new_tokens"] = max_new_tokens forward_params.update(generate_kwargs)
return preprocess_params, forward_kwargs, {}
return preprocess_params, forward_params, {}
def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs): def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
""" """
...@@ -164,7 +163,7 @@ class ImageToTextPipeline(Pipeline): ...@@ -164,7 +163,7 @@ class ImageToTextPipeline(Pipeline):
return model_inputs return model_inputs
def _forward(self, model_inputs, generate_kwargs=None): def _forward(self, model_inputs, **generate_kwargs):
# Git model sets `model_inputs["input_ids"] = None` in `preprocess` (when `prompt=None`). In batch model, the # Git model sets `model_inputs["input_ids"] = None` in `preprocess` (when `prompt=None`). In batch model, the
# pipeline will group them into a list of `None`, which fail `_forward`. Avoid this by checking it first. # pipeline will group them into a list of `None`, which fail `_forward`. Avoid this by checking it first.
if ( if (
...@@ -174,8 +173,6 @@ class ImageToTextPipeline(Pipeline): ...@@ -174,8 +173,6 @@ class ImageToTextPipeline(Pipeline):
): ):
model_inputs["input_ids"] = None model_inputs["input_ids"] = None
if generate_kwargs is None:
generate_kwargs = {}
# FIXME: We need to pop here due to a difference in how `generation.py` and `generation.tf_utils.py` # FIXME: We need to pop here due to a difference in how `generation.py` and `generation.tf_utils.py`
# parse inputs. In the Tensorflow version, `generate` raises an error if we don't use `input_ids` whereas # parse inputs. In the Tensorflow version, `generate` raises an error if we don't use `input_ids` whereas
# the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name` # the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name`
......
...@@ -376,7 +376,7 @@ class TableQuestionAnsweringPipeline(Pipeline): ...@@ -376,7 +376,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
inputs["table"] = table inputs["table"] = table
return inputs return inputs
def _forward(self, model_inputs, sequential=False): def _forward(self, model_inputs, sequential=False, **generate_kwargs):
table = model_inputs.pop("table") table = model_inputs.pop("table")
if self.type == "tapas": if self.type == "tapas":
...@@ -385,7 +385,7 @@ class TableQuestionAnsweringPipeline(Pipeline): ...@@ -385,7 +385,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
else: else:
outputs = self.batch_inference(**model_inputs) outputs = self.batch_inference(**model_inputs)
else: else:
outputs = self.model.generate(**model_inputs) outputs = self.model.generate(**model_inputs, **generate_kwargs)
model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs} model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs}
return model_outputs return model_outputs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment