Unverified Commit 0b8c1b69 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Change audio kwarg to images in TROCR processor (#18421)


Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent dd21fb37
...@@ -54,27 +54,23 @@ class TrOCRProcessor(ProcessorMixin): ...@@ -54,27 +54,23 @@ class TrOCRProcessor(ProcessorMixin):
if self._in_target_context_manager: if self._in_target_context_manager:
return self.current_processor(*args, **kwargs) return self.current_processor(*args, **kwargs)
if "raw_speech" in kwargs: images = kwargs.pop("images", None)
warnings.warn("Using `raw_speech` as a keyword argument is deprecated. Use `audio` instead.")
audio = kwargs.pop("raw_speech")
else:
audio = kwargs.pop("audio", None)
text = kwargs.pop("text", None) text = kwargs.pop("text", None)
if len(args) > 0: if len(args) > 0:
audio = args[0] images = args[0]
args = args[1:] args = args[1:]
if audio is None and text is None: if images is None and text is None:
raise ValueError("You need to specify either an `audio` or `text` input to process.") raise ValueError("You need to specify either an `images` or `text` input to process.")
if audio is not None: if images is not None:
inputs = self.feature_extractor(audio, *args, **kwargs) inputs = self.feature_extractor(images, *args, **kwargs)
if text is not None: if text is not None:
encodings = self.tokenizer(text, **kwargs) encodings = self.tokenizer(text, **kwargs)
if text is None: if text is None:
return inputs return inputs
elif audio is None: elif images is None:
return encodings return encodings
else: else:
inputs["labels"] = encodings["input_ids"] inputs["labels"] = encodings["input_ids"]
...@@ -102,7 +98,7 @@ class TrOCRProcessor(ProcessorMixin): ...@@ -102,7 +98,7 @@ class TrOCRProcessor(ProcessorMixin):
warnings.warn( warnings.warn(
"`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your " "`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your "
"labels by using the argument `text` of the regular `__call__` method (either in the same call as " "labels by using the argument `text` of the regular `__call__` method (either in the same call as "
"your audio inputs, or in a separate call." "your images inputs, or in a separate call."
) )
self._in_target_context_manager = True self._in_target_context_manager = True
self.current_processor = self.tokenizer self.current_processor = self.tokenizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment