"docs/source/zh/internal/image_processing_utils.md" did not exist on "4c7e8d09008ea4e46dd09dccfbd518bb2b792e75"
Unverified Commit 53cffeb3 authored by Chi's avatar Chi Committed by GitHub
Browse files

Enhancing Code Readability and Maintainability with Simplified Activation...


Enhancing Code Readability and Maintainability with Simplified Activation Function Selection. (#28349)

* Little bit change code in get_activation()

* proper area to deffine gelu_activation() in this two file

* Fix github issue

* Mistake some typo

* My mistake to self using to call config

* Reformat my two file

* Update src/transformers/activations.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/electra/modeling_electra.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/convbert/modeling_convbert.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Rename gelu_act to activatioin

---------
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>
parent 3eddda11
......@@ -856,12 +856,13 @@ class ConvBertGeneratorPredictions(nn.Module):
def __init__(self, config):
super().__init__()
self.activation = get_activation("gelu")
self.LayerNorm = nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps)
self.dense = nn.Linear(config.hidden_size, config.embedding_size)
def forward(self, generator_hidden_states: torch.FloatTensor) -> torch.FloatTensor:
hidden_states = self.dense(generator_hidden_states)
hidden_states = get_activation("gelu")(hidden_states)
hidden_states = self.activation(hidden_states)
hidden_states = self.LayerNorm(hidden_states)
return hidden_states
......
......@@ -631,12 +631,13 @@ class ElectraDiscriminatorPredictions(nn.Module):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.activation = get_activation(config.hidden_act)
self.dense_prediction = nn.Linear(config.hidden_size, 1)
self.config = config
def forward(self, discriminator_hidden_states):
hidden_states = self.dense(discriminator_hidden_states)
hidden_states = get_activation(self.config.hidden_act)(hidden_states)
hidden_states = self.activation(hidden_states)
logits = self.dense_prediction(hidden_states).squeeze(-1)
return logits
......@@ -648,12 +649,13 @@ class ElectraGeneratorPredictions(nn.Module):
def __init__(self, config):
super().__init__()
self.activation = get_activation("gelu")
self.LayerNorm = nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps)
self.dense = nn.Linear(config.hidden_size, config.embedding_size)
def forward(self, generator_hidden_states):
hidden_states = self.dense(generator_hidden_states)
hidden_states = get_activation("gelu")(hidden_states)
hidden_states = self.activation(hidden_states)
hidden_states = self.LayerNorm(hidden_states)
return hidden_states
......@@ -933,6 +935,7 @@ class ElectraClassificationHead(nn.Module):
classifier_dropout = (
config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
)
self.activation = get_activation("gelu")
self.dropout = nn.Dropout(classifier_dropout)
self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
......@@ -940,7 +943,7 @@ class ElectraClassificationHead(nn.Module):
x = features[:, 0, :] # take <s> token (equiv. to [CLS])
x = self.dropout(x)
x = self.dense(x)
x = get_activation("gelu")(x) # although BERT uses tanh here, it seems Electra authors used gelu here
x = self.activation(x) # although BERT uses tanh here, it seems Electra authors used gelu here
x = self.dropout(x)
x = self.out_proj(x)
return x
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment