Unverified Commit 9faa9f9d authored by Shijie Wu's avatar Shijie Wu Committed by GitHub
Browse files

remove unused activation dropout (#18842)

parent a2611477
...@@ -67,8 +67,6 @@ class OPTConfig(PretrainedConfig): ...@@ -67,8 +67,6 @@ class OPTConfig(PretrainedConfig):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_dropout (`float`, *optional*, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
layerdrop: (`float`, *optional*, defaults to 0.0): layerdrop: (`float`, *optional*, defaults to 0.0):
The LayerDrop probability. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) for more The LayerDrop probability. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) for more
details. details.
...@@ -106,7 +104,6 @@ class OPTConfig(PretrainedConfig): ...@@ -106,7 +104,6 @@ class OPTConfig(PretrainedConfig):
word_embed_proj_dim=None, word_embed_proj_dim=None,
dropout=0.1, dropout=0.1,
attention_dropout=0.0, attention_dropout=0.0,
activation_dropout=0.0,
num_attention_heads=12, num_attention_heads=12,
activation_function="relu", activation_function="relu",
layerdrop=0.0, layerdrop=0.0,
...@@ -132,7 +129,6 @@ class OPTConfig(PretrainedConfig): ...@@ -132,7 +129,6 @@ class OPTConfig(PretrainedConfig):
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.dropout = dropout self.dropout = dropout
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
self.activation_dropout = activation_dropout
self.activation_function = activation_function self.activation_function = activation_function
self.init_std = init_std self.init_std = init_std
self.layerdrop = layerdrop self.layerdrop = layerdrop
......
...@@ -281,8 +281,6 @@ class OPTDecoderLayer(nn.Module): ...@@ -281,8 +281,6 @@ class OPTDecoderLayer(nn.Module):
self.dropout = config.dropout self.dropout = config.dropout
self.activation_fn = ACT2FN[config.activation_function] self.activation_fn = ACT2FN[config.activation_function]
self.activation_dropout = config.activation_dropout
self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim) self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
self.fc1 = nn.Linear(self.embed_dim, config.ffn_dim) self.fc1 = nn.Linear(self.embed_dim, config.ffn_dim)
self.fc2 = nn.Linear(config.ffn_dim, self.embed_dim) self.fc2 = nn.Linear(config.ffn_dim, self.embed_dim)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment