Unverified Commit 63424273 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Remove more unused attributes in config classes (#21327)



* remove unused classifier_dropout

* remove unused dropout

* remove unused pooler_fn

* remove unnecessary is_encoder_decoder

* remove unnecessary drop_rate

* remove unused classifier_dropout

* remove unused classifier_dropout

* remove unused dropout

* remove unused dropout

* remove unused summary_* attributes

* remove unused tie_word_embeddings

* remove unused summary_* attributes

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent da2a4d95
...@@ -63,8 +63,6 @@ class TrOCRConfig(PretrainedConfig): ...@@ -63,8 +63,6 @@ class TrOCRConfig(PretrainedConfig):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0): activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer. The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
init_std (`float`, *optional*, defaults to 0.02): init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
decoder_layerdrop (`float`, *optional*, defaults to 0.0): decoder_layerdrop (`float`, *optional*, defaults to 0.0):
...@@ -114,7 +112,6 @@ class TrOCRConfig(PretrainedConfig): ...@@ -114,7 +112,6 @@ class TrOCRConfig(PretrainedConfig):
attention_dropout=0.0, attention_dropout=0.0,
activation_dropout=0.0, activation_dropout=0.0,
decoder_start_token_id=2, decoder_start_token_id=2,
classifier_dropout=0.0,
init_std=0.02, init_std=0.02,
decoder_layerdrop=0.0, decoder_layerdrop=0.0,
use_cache=True, use_cache=True,
...@@ -136,7 +133,6 @@ class TrOCRConfig(PretrainedConfig): ...@@ -136,7 +133,6 @@ class TrOCRConfig(PretrainedConfig):
self.dropout = dropout self.dropout = dropout
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
self.activation_dropout = activation_dropout self.activation_dropout = activation_dropout
self.classifier_dropout = classifier_dropout
self.init_std = init_std self.init_std = init_std
self.decoder_layerdrop = decoder_layerdrop self.decoder_layerdrop = decoder_layerdrop
self.use_cache = use_cache self.use_cache = use_cache
......
...@@ -129,8 +129,6 @@ class WhisperConfig(PretrainedConfig): ...@@ -129,8 +129,6 @@ class WhisperConfig(PretrainedConfig):
Begin of stream token id. Begin of stream token id.
eos_token_id (`int`, *optional*, defaults to 50257): eos_token_id (`int`, *optional*, defaults to 50257):
End of stream token id. End of stream token id.
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
Whether to tie input and output embeddings.
suppress_tokens (`List[int]`, *optional*): suppress_tokens (`List[int]`, *optional*):
A list containing the non-speech tokens that will be used by the logit processor in the `generate` A list containing the non-speech tokens that will be used by the logit processor in the `generate`
function. NON_SPEECH_TOKENS and NON_SPEECH_TOKENS_MULTI each correspond to the `english-only` and the function. NON_SPEECH_TOKENS and NON_SPEECH_TOKENS_MULTI each correspond to the `english-only` and the
...@@ -185,7 +183,6 @@ class WhisperConfig(PretrainedConfig): ...@@ -185,7 +183,6 @@ class WhisperConfig(PretrainedConfig):
pad_token_id=50256, pad_token_id=50256,
bos_token_id=50257, bos_token_id=50257,
eos_token_id=50256, eos_token_id=50256,
tie_word_embeddings=True,
suppress_tokens=None, suppress_tokens=None,
begin_suppress_tokens=[220, 50256], begin_suppress_tokens=[220, 50256],
**kwargs **kwargs
...@@ -209,7 +206,6 @@ class WhisperConfig(PretrainedConfig): ...@@ -209,7 +206,6 @@ class WhisperConfig(PretrainedConfig):
self.use_cache = use_cache self.use_cache = use_cache
self.num_hidden_layers = encoder_layers self.num_hidden_layers = encoder_layers
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
self.tie_word_embeddings = tie_word_embeddings
self.max_source_positions = max_source_positions self.max_source_positions = max_source_positions
self.max_target_positions = max_target_positions self.max_target_positions = max_target_positions
super().__init__( super().__init__(
...@@ -218,7 +214,6 @@ class WhisperConfig(PretrainedConfig): ...@@ -218,7 +214,6 @@ class WhisperConfig(PretrainedConfig):
eos_token_id=eos_token_id, eos_token_id=eos_token_id,
is_encoder_decoder=is_encoder_decoder, is_encoder_decoder=is_encoder_decoder,
decoder_start_token_id=decoder_start_token_id, decoder_start_token_id=decoder_start_token_id,
tie_word_embeddings=tie_word_embeddings,
suppress_tokens=suppress_tokens, suppress_tokens=suppress_tokens,
begin_suppress_tokens=begin_suppress_tokens, begin_suppress_tokens=begin_suppress_tokens,
**kwargs, **kwargs,
......
...@@ -62,8 +62,6 @@ class XCLIPTextConfig(PretrainedConfig): ...@@ -62,8 +62,6 @@ class XCLIPTextConfig(PretrainedConfig):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
initializer_range (`float`, *optional*, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
initializer_factor (`float``, *optional*, defaults to 1): initializer_factor (`float``, *optional*, defaults to 1):
...@@ -96,7 +94,6 @@ class XCLIPTextConfig(PretrainedConfig): ...@@ -96,7 +94,6 @@ class XCLIPTextConfig(PretrainedConfig):
max_position_embeddings=77, max_position_embeddings=77,
hidden_act="quick_gelu", hidden_act="quick_gelu",
layer_norm_eps=1e-5, layer_norm_eps=1e-5,
dropout=0.0,
attention_dropout=0.0, attention_dropout=0.0,
initializer_range=0.02, initializer_range=0.02,
initializer_factor=1.0, initializer_factor=1.0,
...@@ -110,7 +107,6 @@ class XCLIPTextConfig(PretrainedConfig): ...@@ -110,7 +107,6 @@ class XCLIPTextConfig(PretrainedConfig):
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.intermediate_size = intermediate_size self.intermediate_size = intermediate_size
self.dropout = dropout
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = max_position_embeddings
...@@ -176,8 +172,6 @@ class XCLIPVisionConfig(PretrainedConfig): ...@@ -176,8 +172,6 @@ class XCLIPVisionConfig(PretrainedConfig):
`"relu"`, `"selu"`, `"gelu_new"` and ``"quick_gelu"` are supported. `"relu"`, `"selu"`, `"gelu_new"` and ``"quick_gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-5): layer_norm_eps (`float`, *optional*, defaults to 1e-5):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_dropout (`float`, *optional*, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
initializer_range (`float`, *optional*, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
...@@ -221,7 +215,6 @@ class XCLIPVisionConfig(PretrainedConfig): ...@@ -221,7 +215,6 @@ class XCLIPVisionConfig(PretrainedConfig):
num_frames=8, num_frames=8,
hidden_act="quick_gelu", hidden_act="quick_gelu",
layer_norm_eps=1e-5, layer_norm_eps=1e-5,
dropout=0.0,
attention_dropout=0.0, attention_dropout=0.0,
initializer_range=0.02, initializer_range=0.02,
initializer_factor=1.0, initializer_factor=1.0,
...@@ -232,7 +225,6 @@ class XCLIPVisionConfig(PretrainedConfig): ...@@ -232,7 +225,6 @@ class XCLIPVisionConfig(PretrainedConfig):
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.intermediate_size = intermediate_size self.intermediate_size = intermediate_size
self.dropout = dropout
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.mit_hidden_size = mit_hidden_size self.mit_hidden_size = mit_hidden_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment