fix: Removed `duplicate` field definitions in some classes (#31888)

Removed duplicate field definitions in classes.

fix: Removed `duplicate` field definitions in some classes (#31888)
Removed duplicate field definitions in classes.
da79b180 · Sai-Suraj-27 · GitHub · 9d98706b · da79b180 · da79b180
Unverified Commit da79b180 authored Jul 10, 2024 by Sai-Suraj-27 Committed by GitHub Jul 10, 2024
8 changed files
--- a/examples/flax/language-modeling/run_clm_flax.py
+++ b/examples/flax/language-modeling/run_clm_flax.py
@@ -225,9 +225,6 @@ class DataTrainingArguments:
            )
        },
    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
    validation_split_percentage: Optional[int] = field(
        default=5,
        metadata={

--- a/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
+++ b/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
@@ -163,9 +163,6 @@ class DataTrainingArguments:
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
    preprocessing_num_workers: Optional[int] = field(
        default=None,
        metadata={"help": "The number of processes to use for the preprocessing."},

--- a/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
+++ b/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
@@ -156,9 +156,6 @@ class DataTrainingArguments:
            )
        },
    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
    preprocessing_num_workers: Optional[int] = field(
        default=None,
        metadata={"help": "The number of processes to use for the preprocessing."},

--- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py
@@ -1080,7 +1080,6 @@ class DeformableDetrPreTrainedModel(PreTrainedModel):
    main_input_name = "pixel_values"
    supports_gradient_checkpointing = True
    _no_split_modules = [r"DeformableDetrConvEncoder", r"DeformableDetrEncoderLayer", r"DeformableDetrDecoderLayer"]
-    supports_gradient_checkpointing = True

    def _init_weights(self, module):
        std = self.config.init_std

--- a/src/transformers/models/video_llava/modeling_video_llava.py
+++ b/src/transformers/models/video_llava/modeling_video_llava.py
@@ -126,7 +126,6 @@ class VideoLlavaPreTrainedModel(PreTrainedModel):
    _no_split_modules = ["VideoLlavaVisionAttention"]
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
-    _no_split_modules = ["VideoLlavaVisionAttention"]

    def _init_weights(self, module):
        std = (

--- a/tests/models/fnet/test_modeling_fnet.py
+++ b/tests/models/fnet/test_modeling_fnet.py
@@ -295,7 +295,6 @@ class FNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    # Skip Tests
    test_pruning = False
    test_head_masking = False
-    test_pruning = False

    # TODO: Fix the failed tests
    def is_pipeline_test_to_skip(

--- a/tests/models/mamba/test_modeling_mamba.py
+++ b/tests/models/mamba/test_modeling_mamba.py
@@ -258,7 +258,6 @@ class MambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    test_model_parallel = False
    test_pruning = False
    test_head_masking = False  # Mamba does not have attention heads
-    test_model_parallel = False
    pipeline_model_mapping = (
        {"feature-extraction": MambaModel, "text-generation": MambaForCausalLM} if is_torch_available() else {}
    )

--- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
+++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
@@ -298,7 +298,6 @@ class RecurrentGemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
    test_model_parallel = False
    test_pruning = False
    test_head_masking = False  # RecurrentGemma does not have attention heads
-    test_model_parallel = False

    # Need to remove 0.9 in `test_cpu_offload`
    # This is because we are hitting edge cases with the causal_mask buffer