"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "37bba2a32d2742a10216ffd925bb8f145a732ce1"
Unverified Commit 7a22a02a authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[tokenizer.prepare_seq2seq_batch] change deprecation to be easily actionable (#12669)



* change deprecation to be easily actionable

* Update src/transformers/tokenization_utils_base.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* rework as suggested

* one warning together

* fix format
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 711d901c
...@@ -3274,13 +3274,23 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ...@@ -3274,13 +3274,23 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
The full set of keys ``[input_ids, attention_mask, labels]``, will only be returned if tgt_texts is passed. The full set of keys ``[input_ids, attention_mask, labels]``, will only be returned if tgt_texts is passed.
Otherwise, input_ids, attention_mask will be the only keys. Otherwise, input_ids, attention_mask will be the only keys.
""" """
warnings.warn( # docstyle-ignore
"`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the " formatted_warning = """
"regular `__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` " `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
"context manager to prepare your targets. See the documentation of your specific tokenizer for more " `__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
"details", your targets.
FutureWarning,
) Here is a short example:
model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]
See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.
"""
warnings.warn(formatted_warning, FutureWarning)
# mBART-specific kwargs that should be ignored by other models. # mBART-specific kwargs that should be ignored by other models.
kwargs.pop("src_lang", None) kwargs.pop("src_lang", None)
kwargs.pop("tgt_lang", None) kwargs.pop("tgt_lang", None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment