fix: Fixed raising `TypeError` instead of `ValueError` for invalid type (#32111)

* Raised TypeError instead of ValueError for invalid types. * Updated formatting using ruff. * Retrieved few changes. * Retrieved few changes. * Updated tests accordingly.

fix: Fixed raising `TypeError` instead of `ValueError` for invalid type (#32111)
* Raised TypeError instead of ValueError for invalid types. * Updated formatting using ruff. * Retrieved few changes. * Retrieved few changes. * Updated tests accordingly.
12b6880c · Sai-Suraj-27 · GitHub · d1ec36b9 · 12b6880c · 12b6880c
Unverified Commit 12b6880c authored Jul 22, 2024 by Sai-Suraj-27 Committed by GitHub Jul 22, 2024
18 changed files
--- a/src/transformers/models/owlvit/modeling_owlvit.py
+++ b/src/transformers/models/owlvit/modeling_owlvit.py
@@ -998,13 +998,13 @@ class OwlViTModel(OwlViTPreTrainedModel):
        super().__init__(config)

        if not isinstance(config.text_config, OwlViTTextConfig):
-            raise ValueError(
+            raise TypeError(
                "config.text_config is expected to be of type OwlViTTextConfig but is of type"
                f" {type(config.text_config)}."
            )

        if not isinstance(config.vision_config, OwlViTVisionConfig):
-            raise ValueError(
+            raise TypeError(
                "config.vision_config is expected to be of type OwlViTVisionConfig but is of type"
                f" {type(config.vision_config)}."
            )

--- a/src/transformers/models/rag/retrieval_rag.py
+++ b/src/transformers/models/rag/retrieval_rag.py
@@ -204,7 +204,7 @@ class HFIndexBase(Index):

    def _check_dataset_format(self, with_index: bool):
        if not isinstance(self.dataset, Dataset):
-            raise ValueError(f"Dataset should be a datasets.Dataset object, but got {type(self.dataset)}")
+            raise TypeError(f"Dataset should be a datasets.Dataset object, but got {type(self.dataset)}")
        if len({"title", "text", "embeddings"} - set(self.dataset.column_names)) > 0:
            raise ValueError(
                "Dataset should be a dataset with the following columns: "

--- a/src/transformers/models/siglip/modeling_siglip.py
+++ b/src/transformers/models/siglip/modeling_siglip.py
@@ -1202,13 +1202,13 @@ class SiglipModel(SiglipPreTrainedModel):
        super().__init__(config)

        if not isinstance(config.text_config, SiglipTextConfig):
-            raise ValueError(
+            raise TypeError(
                "config.text_config is expected to be of type SiglipTextConfig but is of type"
                f" {type(config.text_config)}."
            )

        if not isinstance(config.vision_config, SiglipVisionConfig):
-            raise ValueError(
+            raise TypeError(
                "config.vision_config is expected to be of type SiglipVisionConfig but is of type"
                f" {type(config.vision_config)}."
            )

--- a/src/transformers/models/udop/configuration_udop.py
+++ b/src/transformers/models/udop/configuration_udop.py
@@ -135,7 +135,7 @@ class UdopConfig(PretrainedConfig):
        self.patch_size = patch_size
        self.num_channels = num_channels
        if not isinstance(relative_bias_args, list):
-            raise ValueError("`relative_bias_args` should be a list of dictionaries.")
+            raise TypeError("`relative_bias_args` should be a list of dictionaries.")
        self.relative_bias_args = relative_bias_args

        act_info = self.feed_forward_proj.split("-")

--- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
+++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
@@ -92,7 +92,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):

        super().__init__(feature_extractor, tokenizer)
        if not isinstance(decoder, BeamSearchDecoderCTC):
-            raise ValueError(f"`decoder` has to be of type {BeamSearchDecoderCTC.__class__}, but is {type(decoder)}")
+            raise TypeError(f"`decoder` has to be of type {BeamSearchDecoderCTC.__class__}, but is {type(decoder)}")

        if feature_extractor.__class__.__name__ not in ["Wav2Vec2FeatureExtractor", "SeamlessM4TFeatureExtractor"]:
            raise ValueError(

--- a/src/transformers/models/x_clip/modeling_x_clip.py
+++ b/src/transformers/models/x_clip/modeling_x_clip.py
@@ -1242,13 +1242,13 @@ class XCLIPModel(XCLIPPreTrainedModel):
        super().__init__(config)

        if not isinstance(config.text_config, XCLIPTextConfig):
-            raise ValueError(
+            raise TypeError(
                "config.text_config is expected to be of type XCLIPTextConfig but is of type"
                f" {type(config.text_config)}."
            )

        if not isinstance(config.vision_config, XCLIPVisionConfig):
-            raise ValueError(
+            raise TypeError(
                "config.vision_config is expected to be of type XCLIPVisionConfig but is of type"
                f" {type(config.vision_config)}."
            )

--- a/src/transformers/models/zoedepth/modeling_zoedepth.py
+++ b/src/transformers/models/zoedepth/modeling_zoedepth.py
@@ -334,7 +334,7 @@ class ZoeDepthNeck(nn.Module):
                List of hidden states from the backbone.
        """
        if not isinstance(hidden_states, (tuple, list)):
-            raise ValueError("hidden_states should be a tuple or list of tensors")
+            raise TypeError("hidden_states should be a tuple or list of tensors")

        if len(hidden_states) != len(self.config.neck_hidden_sizes):
            raise ValueError("The number of hidden states should be equal to the number of neck hidden sizes.")

--- a/src/transformers/pipelines/audio_classification.py
+++ b/src/transformers/pipelines/audio_classification.py
@@ -190,7 +190,7 @@ class AudioClassificationPipeline(Pipeline):
                ).numpy()

        if not isinstance(inputs, np.ndarray):
-            raise ValueError("We expect a numpy ndarray as input")
+            raise TypeError("We expect a numpy ndarray as input")
        if len(inputs.shape) != 1:
            raise ValueError("We expect a single channel audio input for AudioClassificationPipeline")


--- a/src/transformers/pipelines/automatic_speech_recognition.py
+++ b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -406,7 +406,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
                # of the original length in the stride so we can cut properly.
                stride = (inputs.shape[0], int(round(stride[0] * ratio)), int(round(stride[1] * ratio)))
        if not isinstance(inputs, np.ndarray):
-            raise ValueError(f"We expect a numpy ndarray as input, got `{type(inputs)}`")
+            raise TypeError(f"We expect a numpy ndarray as input, got `{type(inputs)}`")
        if len(inputs.shape) != 1:
            raise ValueError("We expect a single channel audio input for AutomaticSpeechRecognitionPipeline")


--- a/src/transformers/pipelines/zero_shot_audio_classification.py
+++ b/src/transformers/pipelines/zero_shot_audio_classification.py
@@ -114,7 +114,7 @@ class ZeroShotAudioClassificationPipeline(Pipeline):
            audio = ffmpeg_read(audio, self.feature_extractor.sampling_rate)

        if not isinstance(audio, np.ndarray):
-            raise ValueError("We expect a numpy ndarray as input")
+            raise TypeError("We expect a numpy ndarray as input")
        if len(audio.shape) != 1:
            raise ValueError("We expect a single channel audio input for ZeroShotAudioClassificationPipeline")


--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -356,7 +356,7 @@ class ProcessorMixin(PushToHubMixin):
                proper_class = getattr(transformers_module, class_name)

            if not isinstance(arg, proper_class):
-                raise ValueError(
+                raise TypeError(
                    f"Received a {type(arg).__name__} for argument {attribute_name}, but a {class_name} was expected."
                )


--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -474,7 +474,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
        # Always raise an error if string because users should define the behavior
        for index, token in value.items():
            if not isinstance(token, (str, AddedToken)) or not isinstance(index, int):
-                raise ValueError(
+                raise TypeError(
                    f"The provided `added_tokens_decoder` has an element of type {index.__class__, token.__class__}, should be a dict of {int, Union[AddedToken, str]}"
                )


--- a/src/transformers/utils/quantization_config.py
+++ b/src/transformers/utils/quantization_config.py
@@ -405,7 +405,7 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
    @load_in_4bit.setter
    def load_in_4bit(self, value: bool):
        if not isinstance(value, bool):
-            raise ValueError("load_in_4bit must be a boolean")
+            raise TypeError("load_in_4bit must be a boolean")

        if self.load_in_8bit and value:
            raise ValueError("load_in_4bit and load_in_8bit are both True, but only one can be used at the same time")
@@ -418,7 +418,7 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
    @load_in_8bit.setter
    def load_in_8bit(self, value: bool):
        if not isinstance(value, bool):
-            raise ValueError("load_in_8bit must be a boolean")
+            raise TypeError("load_in_8bit must be a boolean")

        if self.load_in_4bit and value:
            raise ValueError("load_in_4bit and load_in_8bit are both True, but only one can be used at the same time")
@@ -429,30 +429,30 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
        Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.
        """
        if not isinstance(self.load_in_4bit, bool):
-            raise ValueError("load_in_4bit must be a boolean")
+            raise TypeError("load_in_4bit must be a boolean")

        if not isinstance(self.load_in_8bit, bool):
-            raise ValueError("load_in_8bit must be a boolean")
+            raise TypeError("load_in_8bit must be a boolean")

        if not isinstance(self.llm_int8_threshold, float):
-            raise ValueError("llm_int8_threshold must be a float")
+            raise TypeError("llm_int8_threshold must be a float")

        if self.llm_int8_skip_modules is not None and not isinstance(self.llm_int8_skip_modules, list):
-            raise ValueError("llm_int8_skip_modules must be a list of strings")
+            raise TypeError("llm_int8_skip_modules must be a list of strings")
        if not isinstance(self.llm_int8_enable_fp32_cpu_offload, bool):
-            raise ValueError("llm_int8_enable_fp32_cpu_offload must be a boolean")
+            raise TypeError("llm_int8_enable_fp32_cpu_offload must be a boolean")

        if not isinstance(self.llm_int8_has_fp16_weight, bool):
-            raise ValueError("llm_int8_has_fp16_weight must be a boolean")
+            raise TypeError("llm_int8_has_fp16_weight must be a boolean")

        if self.bnb_4bit_compute_dtype is not None and not isinstance(self.bnb_4bit_compute_dtype, torch.dtype):
-            raise ValueError("bnb_4bit_compute_dtype must be torch.dtype")
+            raise TypeError("bnb_4bit_compute_dtype must be torch.dtype")

        if not isinstance(self.bnb_4bit_quant_type, str):
-            raise ValueError("bnb_4bit_quant_type must be a string")
+            raise TypeError("bnb_4bit_quant_type must be a string")

        if not isinstance(self.bnb_4bit_use_double_quant, bool):
-            raise ValueError("bnb_4bit_use_double_quant must be a boolean")
+            raise TypeError("bnb_4bit_use_double_quant must be a boolean")

        if self.load_in_4bit and not version.parse(importlib.metadata.version("bitsandbytes")) >= version.parse(
            "0.39.0"
@@ -957,13 +957,13 @@ class AqlmConfig(QuantizationConfigMixin):
        Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.
        """
        if not isinstance(self.in_group_size, int):
-            raise ValueError("in_group_size must be a float")
+            raise TypeError("in_group_size must be a float")
        if not isinstance(self.out_group_size, int):
-            raise ValueError("out_group_size must be a float")
+            raise TypeError("out_group_size must be a float")
        if not isinstance(self.num_codebooks, int):
-            raise ValueError("num_codebooks must be a float")
+            raise TypeError("num_codebooks must be a float")
        if not isinstance(self.nbits_per_codebook, int):
-            raise ValueError("nbits_per_codebook must be a float")
+            raise TypeError("nbits_per_codebook must be a float")

        if self.linear_weights_not_to_quantize is not None and not isinstance(
            self.linear_weights_not_to_quantize, list

--- a/tests/agents/test_tools_common.py
+++ b/tests/agents/test_tools_common.py
@@ -60,7 +60,7 @@ def output_type(output):
    elif isinstance(output, (torch.Tensor, AgentAudio)):
        return "audio"
    else:
-        raise ValueError(f"Invalid output: {output}")
+        raise TypeError(f"Invalid output: {output}")


 @is_agent_test

--- a/tests/models/luke/test_tokenization_luke.py
+++ b/tests/models/luke/test_tokenization_luke.py
@@ -188,7 +188,7 @@ class LukeTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
        with self.assertRaises(ValueError):
            tokenizer(sentence, entities=tuple(entities), entity_spans=spans)

-        with self.assertRaises(ValueError):
+        with self.assertRaises(TypeError):
            tokenizer(sentence, entities=entities, entity_spans=tuple(spans))

        with self.assertRaises(ValueError):

--- a/tests/models/mluke/test_tokenization_mluke.py
+++ b/tests/models/mluke/test_tokenization_mluke.py
@@ -151,7 +151,7 @@ class MLukeTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
        with self.assertRaises(ValueError):
            tokenizer(sentence, entities=tuple(entities), entity_spans=spans)

-        with self.assertRaises(ValueError):
+        with self.assertRaises(TypeError):
            tokenizer(sentence, entities=entities, entity_spans=tuple(spans))

        with self.assertRaises(ValueError):

--- a/tests/pipelines/test_pipelines_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_feature_extraction.py
@@ -171,7 +171,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase):
        elif isinstance(input_, float):
            return 0
        else:
-            raise ValueError("We expect lists of floats, nothing else")
+            raise TypeError("We expect lists of floats, nothing else")
        return shape

    def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"):

--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -145,7 +145,7 @@ class PipelineTesterMixin:
        if not isinstance(model_architectures, tuple):
            model_architectures = (model_architectures,)
        if not isinstance(model_architectures, tuple):
-            raise ValueError(f"`model_architectures` must be a tuple. Got {type(model_architectures)} instead.")
+            raise TypeError(f"`model_architectures` must be a tuple. Got {type(model_architectures)} instead.")

        for model_architecture in model_architectures:
            model_arch_name = model_architecture.__name__