"docs/git@developer.sourcefind.cn:OpenDAS/opencompass.git" did not exist on "bd50bad8b509be5321258bf41887a03d743c4d34"
Unverified Commit e697c912 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Remove more unused attributes in config classes (#20858)



Remove more unused attributes in config classes
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 9c6f7485
...@@ -52,8 +52,6 @@ class CTRLConfig(PretrainedConfig): ...@@ -52,8 +52,6 @@ class CTRLConfig(PretrainedConfig):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
embd_pdrop (`int`, *optional*, defaults to 0.1): embd_pdrop (`int`, *optional*, defaults to 0.1):
The dropout ratio for the embeddings. The dropout ratio for the embeddings.
attn_pdrop (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention.
layer_norm_epsilon (`float`, *optional*, defaults to 1e-6): layer_norm_epsilon (`float`, *optional*, defaults to 1e-6):
The epsilon to use in the layer normalization layers The epsilon to use in the layer normalization layers
initializer_range (`float`, *optional*, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
...@@ -96,7 +94,6 @@ class CTRLConfig(PretrainedConfig): ...@@ -96,7 +94,6 @@ class CTRLConfig(PretrainedConfig):
n_head=16, n_head=16,
resid_pdrop=0.1, resid_pdrop=0.1,
embd_pdrop=0.1, embd_pdrop=0.1,
attn_pdrop=0.1,
layer_norm_epsilon=1e-6, layer_norm_epsilon=1e-6,
initializer_range=0.02, initializer_range=0.02,
summary_type="cls_index", summary_type="cls_index",
...@@ -115,7 +112,6 @@ class CTRLConfig(PretrainedConfig): ...@@ -115,7 +112,6 @@ class CTRLConfig(PretrainedConfig):
self.dff = dff self.dff = dff
self.resid_pdrop = resid_pdrop self.resid_pdrop = resid_pdrop
self.embd_pdrop = embd_pdrop self.embd_pdrop = embd_pdrop
self.attn_pdrop = attn_pdrop
self.layer_norm_epsilon = layer_norm_epsilon self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range self.initializer_range = initializer_range
......
...@@ -40,9 +40,6 @@ class Data2VecVisionConfig(PretrainedConfig): ...@@ -40,9 +40,6 @@ class Data2VecVisionConfig(PretrainedConfig):
[facebook/data2vec-vision-base](https://huggingface.co/facebook/data2vec-vision-base) architecture. [facebook/data2vec-vision-base](https://huggingface.co/facebook/data2vec-vision-base) architecture.
Args: Args:
vocab_size (`int`, *optional*, defaults to 8092):
Vocabulary size of the Data2VecVision model. Defines the number of different image tokens that can be used
during pre-training.
hidden_size (`int`, *optional*, defaults to 768): hidden_size (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (`int`, *optional*, defaults to 12): num_hidden_layers (`int`, *optional*, defaults to 12):
...@@ -118,7 +115,6 @@ class Data2VecVisionConfig(PretrainedConfig): ...@@ -118,7 +115,6 @@ class Data2VecVisionConfig(PretrainedConfig):
def __init__( def __init__(
self, self,
vocab_size=8192,
hidden_size=768, hidden_size=768,
num_hidden_layers=12, num_hidden_layers=12,
num_attention_heads=12, num_attention_heads=12,
...@@ -150,7 +146,6 @@ class Data2VecVisionConfig(PretrainedConfig): ...@@ -150,7 +146,6 @@ class Data2VecVisionConfig(PretrainedConfig):
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
self.vocab_size = vocab_size
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
......
...@@ -59,9 +59,6 @@ class EsmConfig(PretrainedConfig): ...@@ -59,9 +59,6 @@ class EsmConfig(PretrainedConfig):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (`int`, *optional*, defaults to 3072): intermediate_size (`int`, *optional*, defaults to 3072):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"silu"` and `"gelu_new"` are supported.
hidden_dropout_prob (`float`, *optional*, defaults to 0.1): hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1): attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
...@@ -113,7 +110,6 @@ class EsmConfig(PretrainedConfig): ...@@ -113,7 +110,6 @@ class EsmConfig(PretrainedConfig):
num_hidden_layers=12, num_hidden_layers=12,
num_attention_heads=12, num_attention_heads=12,
intermediate_size=3072, intermediate_size=3072,
hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=1026, max_position_embeddings=1026,
...@@ -135,7 +131,6 @@ class EsmConfig(PretrainedConfig): ...@@ -135,7 +131,6 @@ class EsmConfig(PretrainedConfig):
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.hidden_act = hidden_act
self.intermediate_size = intermediate_size self.intermediate_size = intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob
......
...@@ -221,7 +221,6 @@ class GroupViTVisionConfig(PretrainedConfig): ...@@ -221,7 +221,6 @@ class GroupViTVisionConfig(PretrainedConfig):
initializer_factor=1.0, initializer_factor=1.0,
assign_eps=1.0, assign_eps=1.0,
assign_mlp_ratio=[0.5, 4], assign_mlp_ratio=[0.5, 4],
qkv_bias=True,
**kwargs **kwargs
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
...@@ -249,7 +248,6 @@ class GroupViTVisionConfig(PretrainedConfig): ...@@ -249,7 +248,6 @@ class GroupViTVisionConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.assign_eps = assign_eps self.assign_eps = assign_eps
self.assign_mlp_ratio = assign_mlp_ratio self.assign_mlp_ratio = assign_mlp_ratio
self.qkv_bias = qkv_bias
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig": def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment